From 035e0e3a1f0be89a7e41035b80c9f53a6b1f94dc Mon Sep 17 00:00:00 2001
From: EricDinging <ericdingg16@outlook.com>
Date: Tue, 29 Aug 2023 23:24:21 -0400
Subject: [PATCH 1/4] Fix argument order & renaming

---
 fedscale/cloud/internal/torch_model_adapter.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fedscale/cloud/internal/torch_model_adapter.py b/fedscale/cloud/internal/torch_model_adapter.py
index 813d0869..0d258ec9 100644
--- a/fedscale/cloud/internal/torch_model_adapter.py
+++ b/fedscale/cloud/internal/torch_model_adapter.py
@@ -25,7 +25,7 @@ def set_weights(self, weights: List[np.ndarray]):
         Set the model's weights to the numpy weights array.
         :param weights: numpy weights array
         """
-        current_grad_weights = [param.data.clone() for param in self.model.state_dict().values()]
+        last_grad_weights = [param.data.clone() for param in self.model.state_dict().values()]
         new_state_dict = {
             name: torch.from_numpy(np.asarray(weights[i], dtype=np.float32))
             for i, name in enumerate(self.model.state_dict().keys())
@@ -34,7 +34,7 @@ def set_weights(self, weights: List[np.ndarray]):
         if self.optimizer:
             weights_origin = copy.deepcopy(weights)
             weights = [torch.tensor(x) for x in weights_origin]
-            self.optimizer.update_round_gradient(weights, current_grad_weights, self.model)
+            self.optimizer.update_round_gradient(last_grad_weights, weights, self.model)
 
     def get_weights(self) -> List[np.ndarray]:
         """

From ce8105256b9022ba3349b9c84fd26b7cd6901bc7 Mon Sep 17 00:00:00 2001
From: EricDinging <ericdingg16@outlook.com>
Date: Tue, 29 Aug 2023 23:59:41 -0400
Subject: [PATCH 2/4] Add gradient policy entry in config

---
 benchmark/configs/android/mnn.yml                    | 1 +
 benchmark/configs/android/tflite.yml                 | 1 +
 benchmark/configs/cifar_cpu/cifar_cpu.yml            | 1 +
 benchmark/configs/docker_deploy/cifar_cpu_docker.yml | 1 +
 benchmark/configs/docker_deploy/dry_run_docker.yml   | 3 ++-
 benchmark/configs/docker_deploy/femnist_docker.yml   | 1 +
 benchmark/configs/dry_run/dry_run.yml                | 3 ++-
 benchmark/configs/fedbuff_femnist/conf.yml           | 1 +
 benchmark/configs/femnist/conf.yml                   | 5 +++--
 benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml       | 1 +
 benchmark/configs/k8s_deploy/dry_run_k8s.yml         | 3 ++-
 benchmark/configs/k8s_deploy/femnist_k8s.yml         | 1 +
 benchmark/configs/others/heterofl.yml                | 3 ++-
 benchmark/configs/reddit/reddit.yml                  | 1 +
 benchmark/configs/tf_cifar/tf_cifar.yml              | 1 +
 benchmark/configs/tf_femnist/tf_femnist.yml          | 1 +
 16 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/benchmark/configs/android/mnn.yml b/benchmark/configs/android/mnn.yml
index 363cedcc..884f6d86 100644
--- a/benchmark/configs/android/mnn.yml
+++ b/benchmark/configs/android/mnn.yml
@@ -25,6 +25,7 @@ job_conf:
     - experiment_mode: mobile
     - num_participants: 1                # Number of participants per round, we use K=100 in our paper, large K will be much slower
     - model: linear                      # Need to define the model in aggregator_mnn.py
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - learning_rate: 0.01
     - batch_size: 32
     - input_shape: 32 32 3
diff --git a/benchmark/configs/android/tflite.yml b/benchmark/configs/android/tflite.yml
index a853c2c3..e45b16ce 100644
--- a/benchmark/configs/android/tflite.yml
+++ b/benchmark/configs/android/tflite.yml
@@ -25,6 +25,7 @@ job_conf:
     - experiment_mode: mobile
     - num_participants: 1                # Number of participants per round, we use K=100 in our paper, large K will be much slower
     - model: linear                      # Need to define the model in tf_aggregator.py
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - learning_rate: 0.01
     - batch_size: 32
     - input_shape: 32 32 3
diff --git a/benchmark/configs/cifar_cpu/cifar_cpu.yml b/benchmark/configs/cifar_cpu/cifar_cpu.yml
index d3fea94c..f34fe926 100644
--- a/benchmark/configs/cifar_cpu/cifar_cpu.yml
+++ b/benchmark/configs/cifar_cpu/cifar_cpu.yml
@@ -36,6 +36,7 @@ job_conf:
     - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
     - model: shufflenet_v2_x2_0              # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 5                     # How many rounds to run a testing on the testing set
     - rounds: 600                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/docker_deploy/cifar_cpu_docker.yml b/benchmark/configs/docker_deploy/cifar_cpu_docker.yml
index 86ec9678..522533fe 100644
--- a/benchmark/configs/docker_deploy/cifar_cpu_docker.yml
+++ b/benchmark/configs/docker_deploy/cifar_cpu_docker.yml
@@ -55,6 +55,7 @@ job_conf:
     - data_dir: /FedScale/benchmark/dataset/data/    # Path of the dataset
     - model: shufflenet_v2_x2_0              # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/docker_deploy/dry_run_docker.yml b/benchmark/configs/docker_deploy/dry_run_docker.yml
index cf961f0d..322f1865 100644
--- a/benchmark/configs/docker_deploy/dry_run_docker.yml
+++ b/benchmark/configs/docker_deploy/dry_run_docker.yml
@@ -54,7 +54,8 @@ job_conf:
     - num_participants: 4                      # Number of participants per round, we use K=100 in our paper, large K will be much slower
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: /FedScale/benchmark/dataset/data/    # Path of the dataset
-    - model: resnet18                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
+    - model: resnet18                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 20                       # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/docker_deploy/femnist_docker.yml b/benchmark/configs/docker_deploy/femnist_docker.yml
index 4069d362..974ddde9 100644
--- a/benchmark/configs/docker_deploy/femnist_docker.yml
+++ b/benchmark/configs/docker_deploy/femnist_docker.yml
@@ -59,6 +59,7 @@ job_conf:
     - device_avail_file: /FedScale/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18             # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 20                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/dry_run/dry_run.yml b/benchmark/configs/dry_run/dry_run.yml
index 7fd97674..c7fa689f 100644
--- a/benchmark/configs/dry_run/dry_run.yml
+++ b/benchmark/configs/dry_run/dry_run.yml
@@ -35,7 +35,8 @@ job_conf:
     - num_participants: 4                      # Number of participants per round, we use K=100 in our paper, large K will be much slower
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
-    - model: resnet18                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
+    - model: resnet18                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 5                     # How many rounds to run a testing on the testing set
     - rounds: 200                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/fedbuff_femnist/conf.yml b/benchmark/configs/fedbuff_femnist/conf.yml
index e5b13d27..beba1a25 100644
--- a/benchmark/configs/fedbuff_femnist/conf.yml
+++ b/benchmark/configs/fedbuff_femnist/conf.yml
@@ -39,6 +39,7 @@ job_conf:
     - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18                       # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 1000                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/femnist/conf.yml b/benchmark/configs/femnist/conf.yml
index 89f8c741..f4e149b0 100644
--- a/benchmark/configs/femnist/conf.yml
+++ b/benchmark/configs/femnist/conf.yml
@@ -39,8 +39,9 @@ job_conf:
     - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18             # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo
-    - eval_interval: 10                     # How many rounds to run a testing on the testing set
-    - rounds: 1000                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
+    - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
+    - eval_interval: 20                     # How many rounds to run a testing on the testing set
+    - rounds: 20                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples
     - num_loaders: 2
     - local_steps: 5
diff --git a/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml b/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml
index 5ba3179e..b2869b15 100644
--- a/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml
+++ b/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml
@@ -37,6 +37,7 @@ job_conf:
     - data_dir: /FedScale/benchmark/dataset/data/    # Path of the dataset
     - model: shufflenet_v2_x2_0              # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/k8s_deploy/dry_run_k8s.yml b/benchmark/configs/k8s_deploy/dry_run_k8s.yml
index de495bc4..6027e550 100644
--- a/benchmark/configs/k8s_deploy/dry_run_k8s.yml
+++ b/benchmark/configs/k8s_deploy/dry_run_k8s.yml
@@ -35,7 +35,8 @@ job_conf:
     - num_participants: 4                      # Number of participants per round, we use K=100 in our paper, large K will be much slower
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: /FedScale/benchmark/dataset/data/    # Path of the dataset
-    - model: resnet18                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
+    - model: resnet18                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                       # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/k8s_deploy/femnist_k8s.yml b/benchmark/configs/k8s_deploy/femnist_k8s.yml
index f38ee48f..ecc74e26 100644
--- a/benchmark/configs/k8s_deploy/femnist_k8s.yml
+++ b/benchmark/configs/k8s_deploy/femnist_k8s.yml
@@ -41,6 +41,7 @@ job_conf:
     - device_avail_file: /FedScale/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18             # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/others/heterofl.yml b/benchmark/configs/others/heterofl.yml
index 57b4e39d..3a7f4806 100644
--- a/benchmark/configs/others/heterofl.yml
+++ b/benchmark/configs/others/heterofl.yml
@@ -36,7 +36,8 @@ job_conf:
     - num_participants: 10                      # Number of participants per round, we use K=100 in our paper, large K will be much slower
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
-    - model: resnet_heterofl                      # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
+    - model: resnet_heterofl                      # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 5                     # How many rounds to run a testing on the testing set
     - rounds: 400                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/reddit/reddit.yml b/benchmark/configs/reddit/reddit.yml
index 4bbc37eb..8cf29323 100644
--- a/benchmark/configs/reddit/reddit.yml
+++ b/benchmark/configs/reddit/reddit.yml
@@ -50,6 +50,7 @@ job_conf:
     - device_conf_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_device_capacity     # Path of the client trace
     - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace
     - model: albert-base-v2                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 30                     # How many rounds to run a testing on the testing set
     - rounds: 5000                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/tf_cifar/tf_cifar.yml b/benchmark/configs/tf_cifar/tf_cifar.yml
index cd59bec5..c4382734 100644
--- a/benchmark/configs/tf_cifar/tf_cifar.yml
+++ b/benchmark/configs/tf_cifar/tf_cifar.yml
@@ -36,6 +36,7 @@ job_conf:
     - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
     - model: resnet50                    # Need to define the model in tf_aggregator.py
     - model_zoo: fedscale-tensorflow-zoo
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 5000                # How many rounds to run a testing on the testing set
     - rounds: 200                        # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                     # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/tf_femnist/tf_femnist.yml b/benchmark/configs/tf_femnist/tf_femnist.yml
index eb3b3c5a..3e466533 100644
--- a/benchmark/configs/tf_femnist/tf_femnist.yml
+++ b/benchmark/configs/tf_femnist/tf_femnist.yml
@@ -36,6 +36,7 @@ job_conf:
     - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/femnist    # Path of the dataset
     - model: resnet50                    # Need to define the model in tf_aggregator.py
     - model_zoo: fedscale-tensorflow-zoo
+#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 5000                # How many rounds to run a testing on the testing set
     - rounds: 200                        # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                     # Remove clients w/ less than 21 samples

From afb8b846738dfac2c5a0ef14168ee1e32f3e2625 Mon Sep 17 00:00:00 2001
From: EricDinging <ericdingg16@outlook.com>
Date: Mon, 4 Sep 2023 23:33:08 -0400
Subject: [PATCH 3/4] Fix device not found

---
 examples/dry_run/customized_client.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/dry_run/customized_client.py b/examples/dry_run/customized_client.py
index 4ead6fa6..f2f240c6 100644
--- a/examples/dry_run/customized_client.py
+++ b/examples/dry_run/customized_client.py
@@ -12,14 +12,14 @@
 
 class Customized_Client(TorchClient):
     """Basic client component in Federated Learning"""
-
     def train(self, client_data, model, conf):
         """We flip the label of the malicious client"""
+        device = conf.cuda_device if conf.use_cuda else torch.device(
+            'cpu')
+
         client_id = conf.client_id
 
         logging.info(f"Start to train (CLIENT: {client_id}) ...")
-        device = conf.device
-
         model = model.to(device=device)
         model.train()
 

From 5aa9492104ac472d158f9f305860c2c6446131ae Mon Sep 17 00:00:00 2001
From: EricDinging <ericdingg16@outlook.com>
Date: Mon, 4 Sep 2023 23:45:11 -0400
Subject: [PATCH 4/4] Revert "Add gradient policy entry in config"

This reverts commit ce8105256b9022ba3349b9c84fd26b7cd6901bc7.
---
 benchmark/configs/android/mnn.yml                    | 1 -
 benchmark/configs/android/tflite.yml                 | 1 -
 benchmark/configs/cifar_cpu/cifar_cpu.yml            | 1 -
 benchmark/configs/docker_deploy/cifar_cpu_docker.yml | 1 -
 benchmark/configs/docker_deploy/dry_run_docker.yml   | 3 +--
 benchmark/configs/docker_deploy/femnist_docker.yml   | 1 -
 benchmark/configs/dry_run/dry_run.yml                | 3 +--
 benchmark/configs/fedbuff_femnist/conf.yml           | 1 -
 benchmark/configs/femnist/conf.yml                   | 5 ++---
 benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml       | 1 -
 benchmark/configs/k8s_deploy/dry_run_k8s.yml         | 3 +--
 benchmark/configs/k8s_deploy/femnist_k8s.yml         | 1 -
 benchmark/configs/others/heterofl.yml                | 3 +--
 benchmark/configs/reddit/reddit.yml                  | 1 -
 benchmark/configs/tf_cifar/tf_cifar.yml              | 1 -
 benchmark/configs/tf_femnist/tf_femnist.yml          | 1 -
 16 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/benchmark/configs/android/mnn.yml b/benchmark/configs/android/mnn.yml
index 884f6d86..363cedcc 100644
--- a/benchmark/configs/android/mnn.yml
+++ b/benchmark/configs/android/mnn.yml
@@ -25,7 +25,6 @@ job_conf:
     - experiment_mode: mobile
     - num_participants: 1                # Number of participants per round, we use K=100 in our paper, large K will be much slower
     - model: linear                      # Need to define the model in aggregator_mnn.py
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - learning_rate: 0.01
     - batch_size: 32
     - input_shape: 32 32 3
diff --git a/benchmark/configs/android/tflite.yml b/benchmark/configs/android/tflite.yml
index e45b16ce..a853c2c3 100644
--- a/benchmark/configs/android/tflite.yml
+++ b/benchmark/configs/android/tflite.yml
@@ -25,7 +25,6 @@ job_conf:
     - experiment_mode: mobile
     - num_participants: 1                # Number of participants per round, we use K=100 in our paper, large K will be much slower
     - model: linear                      # Need to define the model in tf_aggregator.py
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - learning_rate: 0.01
     - batch_size: 32
     - input_shape: 32 32 3
diff --git a/benchmark/configs/cifar_cpu/cifar_cpu.yml b/benchmark/configs/cifar_cpu/cifar_cpu.yml
index f34fe926..d3fea94c 100644
--- a/benchmark/configs/cifar_cpu/cifar_cpu.yml
+++ b/benchmark/configs/cifar_cpu/cifar_cpu.yml
@@ -36,7 +36,6 @@ job_conf:
     - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
     - model: shufflenet_v2_x2_0              # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 5                     # How many rounds to run a testing on the testing set
     - rounds: 600                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/docker_deploy/cifar_cpu_docker.yml b/benchmark/configs/docker_deploy/cifar_cpu_docker.yml
index 522533fe..86ec9678 100644
--- a/benchmark/configs/docker_deploy/cifar_cpu_docker.yml
+++ b/benchmark/configs/docker_deploy/cifar_cpu_docker.yml
@@ -55,7 +55,6 @@ job_conf:
     - data_dir: /FedScale/benchmark/dataset/data/    # Path of the dataset
     - model: shufflenet_v2_x2_0              # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/docker_deploy/dry_run_docker.yml b/benchmark/configs/docker_deploy/dry_run_docker.yml
index 322f1865..cf961f0d 100644
--- a/benchmark/configs/docker_deploy/dry_run_docker.yml
+++ b/benchmark/configs/docker_deploy/dry_run_docker.yml
@@ -54,8 +54,7 @@ job_conf:
     - num_participants: 4                      # Number of participants per round, we use K=100 in our paper, large K will be much slower
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: /FedScale/benchmark/dataset/data/    # Path of the dataset
-    - model: resnet18                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
+    - model: resnet18                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 20                       # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/docker_deploy/femnist_docker.yml b/benchmark/configs/docker_deploy/femnist_docker.yml
index 974ddde9..4069d362 100644
--- a/benchmark/configs/docker_deploy/femnist_docker.yml
+++ b/benchmark/configs/docker_deploy/femnist_docker.yml
@@ -59,7 +59,6 @@ job_conf:
     - device_avail_file: /FedScale/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18             # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 20                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/dry_run/dry_run.yml b/benchmark/configs/dry_run/dry_run.yml
index c7fa689f..7fd97674 100644
--- a/benchmark/configs/dry_run/dry_run.yml
+++ b/benchmark/configs/dry_run/dry_run.yml
@@ -35,8 +35,7 @@ job_conf:
     - num_participants: 4                      # Number of participants per round, we use K=100 in our paper, large K will be much slower
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
-    - model: resnet18                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
+    - model: resnet18                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 5                     # How many rounds to run a testing on the testing set
     - rounds: 200                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/fedbuff_femnist/conf.yml b/benchmark/configs/fedbuff_femnist/conf.yml
index beba1a25..e5b13d27 100644
--- a/benchmark/configs/fedbuff_femnist/conf.yml
+++ b/benchmark/configs/fedbuff_femnist/conf.yml
@@ -39,7 +39,6 @@ job_conf:
     - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18                       # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 1000                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/femnist/conf.yml b/benchmark/configs/femnist/conf.yml
index f4e149b0..89f8c741 100644
--- a/benchmark/configs/femnist/conf.yml
+++ b/benchmark/configs/femnist/conf.yml
@@ -39,9 +39,8 @@ job_conf:
     - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18             # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo
-    - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
-    - eval_interval: 20                     # How many rounds to run a testing on the testing set
-    - rounds: 20                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
+    - eval_interval: 10                     # How many rounds to run a testing on the testing set
+    - rounds: 1000                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples
     - num_loaders: 2
     - local_steps: 5
diff --git a/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml b/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml
index b2869b15..5ba3179e 100644
--- a/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml
+++ b/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml
@@ -37,7 +37,6 @@ job_conf:
     - data_dir: /FedScale/benchmark/dataset/data/    # Path of the dataset
     - model: shufflenet_v2_x2_0              # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/k8s_deploy/dry_run_k8s.yml b/benchmark/configs/k8s_deploy/dry_run_k8s.yml
index 6027e550..de495bc4 100644
--- a/benchmark/configs/k8s_deploy/dry_run_k8s.yml
+++ b/benchmark/configs/k8s_deploy/dry_run_k8s.yml
@@ -35,8 +35,7 @@ job_conf:
     - num_participants: 4                      # Number of participants per round, we use K=100 in our paper, large K will be much slower
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: /FedScale/benchmark/dataset/data/    # Path of the dataset
-    - model: resnet18                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
+    - model: resnet18                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                       # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/k8s_deploy/femnist_k8s.yml b/benchmark/configs/k8s_deploy/femnist_k8s.yml
index ecc74e26..f38ee48f 100644
--- a/benchmark/configs/k8s_deploy/femnist_k8s.yml
+++ b/benchmark/configs/k8s_deploy/femnist_k8s.yml
@@ -41,7 +41,6 @@ job_conf:
     - device_avail_file: /FedScale/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18             # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
 #    - model_zoo: fedscale-torch-zoo
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/others/heterofl.yml b/benchmark/configs/others/heterofl.yml
index 3a7f4806..57b4e39d 100644
--- a/benchmark/configs/others/heterofl.yml
+++ b/benchmark/configs/others/heterofl.yml
@@ -36,8 +36,7 @@ job_conf:
     - num_participants: 10                      # Number of participants per round, we use K=100 in our paper, large K will be much slower
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
-    - model: resnet_heterofl                      # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
+    - model: resnet_heterofl                      # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 5                     # How many rounds to run a testing on the testing set
     - rounds: 400                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/reddit/reddit.yml b/benchmark/configs/reddit/reddit.yml
index 8cf29323..4bbc37eb 100644
--- a/benchmark/configs/reddit/reddit.yml
+++ b/benchmark/configs/reddit/reddit.yml
@@ -50,7 +50,6 @@ job_conf:
     - device_conf_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_device_capacity     # Path of the client trace
     - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace
     - model: albert-base-v2                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 30                     # How many rounds to run a testing on the testing set
     - rounds: 5000                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/tf_cifar/tf_cifar.yml b/benchmark/configs/tf_cifar/tf_cifar.yml
index c4382734..cd59bec5 100644
--- a/benchmark/configs/tf_cifar/tf_cifar.yml
+++ b/benchmark/configs/tf_cifar/tf_cifar.yml
@@ -36,7 +36,6 @@ job_conf:
     - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
     - model: resnet50                    # Need to define the model in tf_aggregator.py
     - model_zoo: fedscale-tensorflow-zoo
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 5000                # How many rounds to run a testing on the testing set
     - rounds: 200                        # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                     # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/tf_femnist/tf_femnist.yml b/benchmark/configs/tf_femnist/tf_femnist.yml
index 3e466533..eb3b3c5a 100644
--- a/benchmark/configs/tf_femnist/tf_femnist.yml
+++ b/benchmark/configs/tf_femnist/tf_femnist.yml
@@ -36,7 +36,6 @@ job_conf:
     - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/femnist    # Path of the dataset
     - model: resnet50                    # Need to define the model in tf_aggregator.py
     - model_zoo: fedscale-tensorflow-zoo
-#   - gradient_policy: fed-yogi                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
     - eval_interval: 5000                # How many rounds to run a testing on the testing set
     - rounds: 200                        # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                     # Remove clients w/ less than 21 samples