From 035e0e3a1f0be89a7e41035b80c9f53a6b1f94dc Mon Sep 17 00:00:00 2001 From: EricDinging Date: Tue, 29 Aug 2023 23:24:21 -0400 Subject: [PATCH 1/4] Fix argument order & renaming --- fedscale/cloud/internal/torch_model_adapter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fedscale/cloud/internal/torch_model_adapter.py b/fedscale/cloud/internal/torch_model_adapter.py index 813d0869..0d258ec9 100644 --- a/fedscale/cloud/internal/torch_model_adapter.py +++ b/fedscale/cloud/internal/torch_model_adapter.py @@ -25,7 +25,7 @@ def set_weights(self, weights: List[np.ndarray]): Set the model's weights to the numpy weights array. :param weights: numpy weights array """ - current_grad_weights = [param.data.clone() for param in self.model.state_dict().values()] + last_grad_weights = [param.data.clone() for param in self.model.state_dict().values()] new_state_dict = { name: torch.from_numpy(np.asarray(weights[i], dtype=np.float32)) for i, name in enumerate(self.model.state_dict().keys()) @@ -34,7 +34,7 @@ def set_weights(self, weights: List[np.ndarray]): if self.optimizer: weights_origin = copy.deepcopy(weights) weights = [torch.tensor(x) for x in weights_origin] - self.optimizer.update_round_gradient(weights, current_grad_weights, self.model) + self.optimizer.update_round_gradient(last_grad_weights, weights, self.model) def get_weights(self) -> List[np.ndarray]: """ From ce8105256b9022ba3349b9c84fd26b7cd6901bc7 Mon Sep 17 00:00:00 2001 From: EricDinging Date: Tue, 29 Aug 2023 23:59:41 -0400 Subject: [PATCH 2/4] Add gradient policy entry in config --- benchmark/configs/android/mnn.yml | 1 + benchmark/configs/android/tflite.yml | 1 + benchmark/configs/cifar_cpu/cifar_cpu.yml | 1 + benchmark/configs/docker_deploy/cifar_cpu_docker.yml | 1 + benchmark/configs/docker_deploy/dry_run_docker.yml | 3 ++- benchmark/configs/docker_deploy/femnist_docker.yml | 1 + benchmark/configs/dry_run/dry_run.yml | 3 ++- benchmark/configs/fedbuff_femnist/conf.yml | 1 + benchmark/configs/femnist/conf.yml | 5 +++-- benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml | 1 + benchmark/configs/k8s_deploy/dry_run_k8s.yml | 3 ++- benchmark/configs/k8s_deploy/femnist_k8s.yml | 1 + benchmark/configs/others/heterofl.yml | 3 ++- benchmark/configs/reddit/reddit.yml | 1 + benchmark/configs/tf_cifar/tf_cifar.yml | 1 + benchmark/configs/tf_femnist/tf_femnist.yml | 1 + 16 files changed, 22 insertions(+), 6 deletions(-) diff --git a/benchmark/configs/android/mnn.yml b/benchmark/configs/android/mnn.yml index 363cedcc..884f6d86 100644 --- a/benchmark/configs/android/mnn.yml +++ b/benchmark/configs/android/mnn.yml @@ -25,6 +25,7 @@ job_conf: - experiment_mode: mobile - num_participants: 1 # Number of participants per round, we use K=100 in our paper, large K will be much slower - model: linear # Need to define the model in aggregator_mnn.py +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - learning_rate: 0.01 - batch_size: 32 - input_shape: 32 32 3 diff --git a/benchmark/configs/android/tflite.yml b/benchmark/configs/android/tflite.yml index a853c2c3..e45b16ce 100644 --- a/benchmark/configs/android/tflite.yml +++ b/benchmark/configs/android/tflite.yml @@ -25,6 +25,7 @@ job_conf: - experiment_mode: mobile - num_participants: 1 # Number of participants per round, we use K=100 in our paper, large K will be much slower - model: linear # Need to define the model in tf_aggregator.py +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - learning_rate: 0.01 - batch_size: 32 - input_shape: 32 32 3 diff --git a/benchmark/configs/cifar_cpu/cifar_cpu.yml b/benchmark/configs/cifar_cpu/cifar_cpu.yml index d3fea94c..f34fe926 100644 --- a/benchmark/configs/cifar_cpu/cifar_cpu.yml +++ b/benchmark/configs/cifar_cpu/cifar_cpu.yml @@ -36,6 +36,7 @@ job_conf: - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/ # Path of the dataset - model: shufflenet_v2_x2_0 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 5 # How many rounds to run a testing on the testing set - rounds: 600 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/docker_deploy/cifar_cpu_docker.yml b/benchmark/configs/docker_deploy/cifar_cpu_docker.yml index 86ec9678..522533fe 100644 --- a/benchmark/configs/docker_deploy/cifar_cpu_docker.yml +++ b/benchmark/configs/docker_deploy/cifar_cpu_docker.yml @@ -55,6 +55,7 @@ job_conf: - data_dir: /FedScale/benchmark/dataset/data/ # Path of the dataset - model: shufflenet_v2_x2_0 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 21 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/docker_deploy/dry_run_docker.yml b/benchmark/configs/docker_deploy/dry_run_docker.yml index cf961f0d..322f1865 100644 --- a/benchmark/configs/docker_deploy/dry_run_docker.yml +++ b/benchmark/configs/docker_deploy/dry_run_docker.yml @@ -54,7 +54,8 @@ job_conf: - num_participants: 4 # Number of participants per round, we use K=100 in our paper, large K will be much slower - data_set: cifar10 # Dataset: openImg, google_speech, stackoverflow - data_dir: /FedScale/benchmark/dataset/data/ # Path of the dataset - - model: resnet18 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default + - model: resnet18 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2 +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 20 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/docker_deploy/femnist_docker.yml b/benchmark/configs/docker_deploy/femnist_docker.yml index 4069d362..974ddde9 100644 --- a/benchmark/configs/docker_deploy/femnist_docker.yml +++ b/benchmark/configs/docker_deploy/femnist_docker.yml @@ -59,6 +59,7 @@ job_conf: - device_avail_file: /FedScale/benchmark/dataset/data/device_info/client_behave_trace - model: resnet18 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 20 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 21 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/dry_run/dry_run.yml b/benchmark/configs/dry_run/dry_run.yml index 7fd97674..c7fa689f 100644 --- a/benchmark/configs/dry_run/dry_run.yml +++ b/benchmark/configs/dry_run/dry_run.yml @@ -35,7 +35,8 @@ job_conf: - num_participants: 4 # Number of participants per round, we use K=100 in our paper, large K will be much slower - data_set: cifar10 # Dataset: openImg, google_speech, stackoverflow - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/ # Path of the dataset - - model: resnet18 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default + - model: resnet18 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2 +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 5 # How many rounds to run a testing on the testing set - rounds: 200 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/fedbuff_femnist/conf.yml b/benchmark/configs/fedbuff_femnist/conf.yml index e5b13d27..beba1a25 100644 --- a/benchmark/configs/fedbuff_femnist/conf.yml +++ b/benchmark/configs/fedbuff_femnist/conf.yml @@ -39,6 +39,7 @@ job_conf: - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace - model: resnet18 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 1000 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 21 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/femnist/conf.yml b/benchmark/configs/femnist/conf.yml index 89f8c741..f4e149b0 100644 --- a/benchmark/configs/femnist/conf.yml +++ b/benchmark/configs/femnist/conf.yml @@ -39,8 +39,9 @@ job_conf: - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace - model: resnet18 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo - - eval_interval: 10 # How many rounds to run a testing on the testing set - - rounds: 1000 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds + - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default + - eval_interval: 20 # How many rounds to run a testing on the testing set + - rounds: 20 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 21 # Remove clients w/ less than 21 samples - num_loaders: 2 - local_steps: 5 diff --git a/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml b/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml index 5ba3179e..b2869b15 100644 --- a/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml +++ b/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml @@ -37,6 +37,7 @@ job_conf: - data_dir: /FedScale/benchmark/dataset/data/ # Path of the dataset - model: shufflenet_v2_x2_0 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 21 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/k8s_deploy/dry_run_k8s.yml b/benchmark/configs/k8s_deploy/dry_run_k8s.yml index de495bc4..6027e550 100644 --- a/benchmark/configs/k8s_deploy/dry_run_k8s.yml +++ b/benchmark/configs/k8s_deploy/dry_run_k8s.yml @@ -35,7 +35,8 @@ job_conf: - num_participants: 4 # Number of participants per round, we use K=100 in our paper, large K will be much slower - data_set: cifar10 # Dataset: openImg, google_speech, stackoverflow - data_dir: /FedScale/benchmark/dataset/data/ # Path of the dataset - - model: resnet18 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default + - model: resnet18 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2 +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 21 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/k8s_deploy/femnist_k8s.yml b/benchmark/configs/k8s_deploy/femnist_k8s.yml index f38ee48f..ecc74e26 100644 --- a/benchmark/configs/k8s_deploy/femnist_k8s.yml +++ b/benchmark/configs/k8s_deploy/femnist_k8s.yml @@ -41,6 +41,7 @@ job_conf: - device_avail_file: /FedScale/benchmark/dataset/data/device_info/client_behave_trace - model: resnet18 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 21 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 21 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/others/heterofl.yml b/benchmark/configs/others/heterofl.yml index 57b4e39d..3a7f4806 100644 --- a/benchmark/configs/others/heterofl.yml +++ b/benchmark/configs/others/heterofl.yml @@ -36,7 +36,8 @@ job_conf: - num_participants: 10 # Number of participants per round, we use K=100 in our paper, large K will be much slower - data_set: cifar10 # Dataset: openImg, google_speech, stackoverflow - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/ # Path of the dataset - - model: resnet_heterofl # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default + - model: resnet_heterofl # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2 +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 5 # How many rounds to run a testing on the testing set - rounds: 400 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/reddit/reddit.yml b/benchmark/configs/reddit/reddit.yml index 4bbc37eb..8cf29323 100644 --- a/benchmark/configs/reddit/reddit.yml +++ b/benchmark/configs/reddit/reddit.yml @@ -50,6 +50,7 @@ job_conf: - device_conf_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_device_capacity # Path of the client trace - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace - model: albert-base-v2 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2 +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 30 # How many rounds to run a testing on the testing set - rounds: 5000 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 21 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/tf_cifar/tf_cifar.yml b/benchmark/configs/tf_cifar/tf_cifar.yml index cd59bec5..c4382734 100644 --- a/benchmark/configs/tf_cifar/tf_cifar.yml +++ b/benchmark/configs/tf_cifar/tf_cifar.yml @@ -36,6 +36,7 @@ job_conf: - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/ # Path of the dataset - model: resnet50 # Need to define the model in tf_aggregator.py - model_zoo: fedscale-tensorflow-zoo +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 5000 # How many rounds to run a testing on the testing set - rounds: 200 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/tf_femnist/tf_femnist.yml b/benchmark/configs/tf_femnist/tf_femnist.yml index eb3b3c5a..3e466533 100644 --- a/benchmark/configs/tf_femnist/tf_femnist.yml +++ b/benchmark/configs/tf_femnist/tf_femnist.yml @@ -36,6 +36,7 @@ job_conf: - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/femnist # Path of the dataset - model: resnet50 # Need to define the model in tf_aggregator.py - model_zoo: fedscale-tensorflow-zoo +# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 5000 # How many rounds to run a testing on the testing set - rounds: 200 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples From afb8b846738dfac2c5a0ef14168ee1e32f3e2625 Mon Sep 17 00:00:00 2001 From: EricDinging Date: Mon, 4 Sep 2023 23:33:08 -0400 Subject: [PATCH 3/4] Fix device not found --- examples/dry_run/customized_client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/dry_run/customized_client.py b/examples/dry_run/customized_client.py index 4ead6fa6..f2f240c6 100644 --- a/examples/dry_run/customized_client.py +++ b/examples/dry_run/customized_client.py @@ -12,14 +12,14 @@ class Customized_Client(TorchClient): """Basic client component in Federated Learning""" - def train(self, client_data, model, conf): """We flip the label of the malicious client""" + device = conf.cuda_device if conf.use_cuda else torch.device( + 'cpu') + client_id = conf.client_id logging.info(f"Start to train (CLIENT: {client_id}) ...") - device = conf.device - model = model.to(device=device) model.train() From 5aa9492104ac472d158f9f305860c2c6446131ae Mon Sep 17 00:00:00 2001 From: EricDinging Date: Mon, 4 Sep 2023 23:45:11 -0400 Subject: [PATCH 4/4] Revert "Add gradient policy entry in config" This reverts commit ce8105256b9022ba3349b9c84fd26b7cd6901bc7. --- benchmark/configs/android/mnn.yml | 1 - benchmark/configs/android/tflite.yml | 1 - benchmark/configs/cifar_cpu/cifar_cpu.yml | 1 - benchmark/configs/docker_deploy/cifar_cpu_docker.yml | 1 - benchmark/configs/docker_deploy/dry_run_docker.yml | 3 +-- benchmark/configs/docker_deploy/femnist_docker.yml | 1 - benchmark/configs/dry_run/dry_run.yml | 3 +-- benchmark/configs/fedbuff_femnist/conf.yml | 1 - benchmark/configs/femnist/conf.yml | 5 ++--- benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml | 1 - benchmark/configs/k8s_deploy/dry_run_k8s.yml | 3 +-- benchmark/configs/k8s_deploy/femnist_k8s.yml | 1 - benchmark/configs/others/heterofl.yml | 3 +-- benchmark/configs/reddit/reddit.yml | 1 - benchmark/configs/tf_cifar/tf_cifar.yml | 1 - benchmark/configs/tf_femnist/tf_femnist.yml | 1 - 16 files changed, 6 insertions(+), 22 deletions(-) diff --git a/benchmark/configs/android/mnn.yml b/benchmark/configs/android/mnn.yml index 884f6d86..363cedcc 100644 --- a/benchmark/configs/android/mnn.yml +++ b/benchmark/configs/android/mnn.yml @@ -25,7 +25,6 @@ job_conf: - experiment_mode: mobile - num_participants: 1 # Number of participants per round, we use K=100 in our paper, large K will be much slower - model: linear # Need to define the model in aggregator_mnn.py -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - learning_rate: 0.01 - batch_size: 32 - input_shape: 32 32 3 diff --git a/benchmark/configs/android/tflite.yml b/benchmark/configs/android/tflite.yml index e45b16ce..a853c2c3 100644 --- a/benchmark/configs/android/tflite.yml +++ b/benchmark/configs/android/tflite.yml @@ -25,7 +25,6 @@ job_conf: - experiment_mode: mobile - num_participants: 1 # Number of participants per round, we use K=100 in our paper, large K will be much slower - model: linear # Need to define the model in tf_aggregator.py -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - learning_rate: 0.01 - batch_size: 32 - input_shape: 32 32 3 diff --git a/benchmark/configs/cifar_cpu/cifar_cpu.yml b/benchmark/configs/cifar_cpu/cifar_cpu.yml index f34fe926..d3fea94c 100644 --- a/benchmark/configs/cifar_cpu/cifar_cpu.yml +++ b/benchmark/configs/cifar_cpu/cifar_cpu.yml @@ -36,7 +36,6 @@ job_conf: - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/ # Path of the dataset - model: shufflenet_v2_x2_0 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 5 # How many rounds to run a testing on the testing set - rounds: 600 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/docker_deploy/cifar_cpu_docker.yml b/benchmark/configs/docker_deploy/cifar_cpu_docker.yml index 522533fe..86ec9678 100644 --- a/benchmark/configs/docker_deploy/cifar_cpu_docker.yml +++ b/benchmark/configs/docker_deploy/cifar_cpu_docker.yml @@ -55,7 +55,6 @@ job_conf: - data_dir: /FedScale/benchmark/dataset/data/ # Path of the dataset - model: shufflenet_v2_x2_0 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 21 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/docker_deploy/dry_run_docker.yml b/benchmark/configs/docker_deploy/dry_run_docker.yml index 322f1865..cf961f0d 100644 --- a/benchmark/configs/docker_deploy/dry_run_docker.yml +++ b/benchmark/configs/docker_deploy/dry_run_docker.yml @@ -54,8 +54,7 @@ job_conf: - num_participants: 4 # Number of participants per round, we use K=100 in our paper, large K will be much slower - data_set: cifar10 # Dataset: openImg, google_speech, stackoverflow - data_dir: /FedScale/benchmark/dataset/data/ # Path of the dataset - - model: resnet18 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2 -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default + - model: resnet18 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 20 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/docker_deploy/femnist_docker.yml b/benchmark/configs/docker_deploy/femnist_docker.yml index 974ddde9..4069d362 100644 --- a/benchmark/configs/docker_deploy/femnist_docker.yml +++ b/benchmark/configs/docker_deploy/femnist_docker.yml @@ -59,7 +59,6 @@ job_conf: - device_avail_file: /FedScale/benchmark/dataset/data/device_info/client_behave_trace - model: resnet18 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 20 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 21 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/dry_run/dry_run.yml b/benchmark/configs/dry_run/dry_run.yml index c7fa689f..7fd97674 100644 --- a/benchmark/configs/dry_run/dry_run.yml +++ b/benchmark/configs/dry_run/dry_run.yml @@ -35,8 +35,7 @@ job_conf: - num_participants: 4 # Number of participants per round, we use K=100 in our paper, large K will be much slower - data_set: cifar10 # Dataset: openImg, google_speech, stackoverflow - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/ # Path of the dataset - - model: resnet18 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2 -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default + - model: resnet18 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 5 # How many rounds to run a testing on the testing set - rounds: 200 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/fedbuff_femnist/conf.yml b/benchmark/configs/fedbuff_femnist/conf.yml index beba1a25..e5b13d27 100644 --- a/benchmark/configs/fedbuff_femnist/conf.yml +++ b/benchmark/configs/fedbuff_femnist/conf.yml @@ -39,7 +39,6 @@ job_conf: - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace - model: resnet18 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 1000 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 21 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/femnist/conf.yml b/benchmark/configs/femnist/conf.yml index f4e149b0..89f8c741 100644 --- a/benchmark/configs/femnist/conf.yml +++ b/benchmark/configs/femnist/conf.yml @@ -39,9 +39,8 @@ job_conf: - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace - model: resnet18 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo - - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - - eval_interval: 20 # How many rounds to run a testing on the testing set - - rounds: 20 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds + - eval_interval: 10 # How many rounds to run a testing on the testing set + - rounds: 1000 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 21 # Remove clients w/ less than 21 samples - num_loaders: 2 - local_steps: 5 diff --git a/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml b/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml index b2869b15..5ba3179e 100644 --- a/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml +++ b/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml @@ -37,7 +37,6 @@ job_conf: - data_dir: /FedScale/benchmark/dataset/data/ # Path of the dataset - model: shufflenet_v2_x2_0 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 21 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/k8s_deploy/dry_run_k8s.yml b/benchmark/configs/k8s_deploy/dry_run_k8s.yml index 6027e550..de495bc4 100644 --- a/benchmark/configs/k8s_deploy/dry_run_k8s.yml +++ b/benchmark/configs/k8s_deploy/dry_run_k8s.yml @@ -35,8 +35,7 @@ job_conf: - num_participants: 4 # Number of participants per round, we use K=100 in our paper, large K will be much slower - data_set: cifar10 # Dataset: openImg, google_speech, stackoverflow - data_dir: /FedScale/benchmark/dataset/data/ # Path of the dataset - - model: resnet18 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2 -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default + - model: resnet18 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 21 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/k8s_deploy/femnist_k8s.yml b/benchmark/configs/k8s_deploy/femnist_k8s.yml index ecc74e26..f38ee48f 100644 --- a/benchmark/configs/k8s_deploy/femnist_k8s.yml +++ b/benchmark/configs/k8s_deploy/femnist_k8s.yml @@ -41,7 +41,6 @@ job_conf: - device_avail_file: /FedScale/benchmark/dataset/data/device_info/client_behave_trace - model: resnet18 # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs # - model_zoo: fedscale-torch-zoo -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 10 # How many rounds to run a testing on the testing set - rounds: 21 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 21 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/others/heterofl.yml b/benchmark/configs/others/heterofl.yml index 3a7f4806..57b4e39d 100644 --- a/benchmark/configs/others/heterofl.yml +++ b/benchmark/configs/others/heterofl.yml @@ -36,8 +36,7 @@ job_conf: - num_participants: 10 # Number of participants per round, we use K=100 in our paper, large K will be much slower - data_set: cifar10 # Dataset: openImg, google_speech, stackoverflow - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/ # Path of the dataset - - model: resnet_heterofl # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2 -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default + - model: resnet_heterofl # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 5 # How many rounds to run a testing on the testing set - rounds: 400 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/reddit/reddit.yml b/benchmark/configs/reddit/reddit.yml index 8cf29323..4bbc37eb 100644 --- a/benchmark/configs/reddit/reddit.yml +++ b/benchmark/configs/reddit/reddit.yml @@ -50,7 +50,6 @@ job_conf: - device_conf_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_device_capacity # Path of the client trace - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace - model: albert-base-v2 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2 -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 30 # How many rounds to run a testing on the testing set - rounds: 5000 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 21 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/tf_cifar/tf_cifar.yml b/benchmark/configs/tf_cifar/tf_cifar.yml index c4382734..cd59bec5 100644 --- a/benchmark/configs/tf_cifar/tf_cifar.yml +++ b/benchmark/configs/tf_cifar/tf_cifar.yml @@ -36,7 +36,6 @@ job_conf: - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/ # Path of the dataset - model: resnet50 # Need to define the model in tf_aggregator.py - model_zoo: fedscale-tensorflow-zoo -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 5000 # How many rounds to run a testing on the testing set - rounds: 200 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples diff --git a/benchmark/configs/tf_femnist/tf_femnist.yml b/benchmark/configs/tf_femnist/tf_femnist.yml index 3e466533..eb3b3c5a 100644 --- a/benchmark/configs/tf_femnist/tf_femnist.yml +++ b/benchmark/configs/tf_femnist/tf_femnist.yml @@ -36,7 +36,6 @@ job_conf: - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/femnist # Path of the dataset - model: resnet50 # Need to define the model in tf_aggregator.py - model_zoo: fedscale-tensorflow-zoo -# - gradient_policy: fed-yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default - eval_interval: 5000 # How many rounds to run a testing on the testing set - rounds: 200 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds - filter_less: 0 # Remove clients w/ less than 21 samples