From 92e598c16ed524c73417e5d682546fa41b4b1a25 Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Sun, 6 Mar 2022 00:42:06 -0300
Subject: [PATCH 01/18] Update sim.ipynb

Correcting validation dataset references on FlowerClient class and min_eval_clients to be in accordance with the comments.
---
 examples/quickstart_simulation/sim.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/quickstart_simulation/sim.ipynb b/examples/quickstart_simulation/sim.ipynb
index 67c5bb9c460a..f3df345603a7 100644
--- a/examples/quickstart_simulation/sim.ipynb
+++ b/examples/quickstart_simulation/sim.ipynb
@@ -86,7 +86,7 @@
         "    def __init__(self, model, x_train, y_train, x_val, y_val) -> None:\n",
         "        self.model = model\n",
         "        self.x_train, self.y_train = x_train, y_train\n",
-        "        self.x_val, self.y_val = x_train, y_train\n",
+        "        self.x_val, self.y_val = x_val, y_val\n",
         "\n",
         "    def get_parameters(self):\n",
         "        return self.model.get_weights()\n",
@@ -181,7 +181,7 @@
         "        fraction_fit=0.1,  # Sample 10% of available clients for training\n",
         "        fraction_eval=0.05,  # Sample 5% of available clients for evaluation\n",
         "        min_fit_clients=10,  # Never sample less than 10 clients for training\n",
-        "        min_eval_clients=10,  # Never sample less than 5 clients for evaluation\n",
+        "        min_eval_clients=5,  # Never sample less than 5 clients for evaluation\n",
         "        min_available_clients=int(NUM_CLIENTS * 0.75),  # Wait until at least 75 clients are available\n",
         ")\n",
         "\n",

From 0770547c5b1806728369061bd3e7e0e3f1978ff4 Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Tue, 12 Jul 2022 16:45:29 -0300
Subject: [PATCH 02/18] Change to comply with .fit() tuple requirements

To be compatible with sanity check for the return of fit method from clients (Tuple of list,int,dict) introduced on the last version of flower.
---
 examples/sklearn-logreg-mnist/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/sklearn-logreg-mnist/utils.py b/examples/sklearn-logreg-mnist/utils.py
index efccf97e4a1f..86eae13ddf1f 100644
--- a/examples/sklearn-logreg-mnist/utils.py
+++ b/examples/sklearn-logreg-mnist/utils.py
@@ -12,9 +12,9 @@
 def get_model_parameters(model: LogisticRegression) -> LogRegParams:
     """Returns the paramters of a sklearn LogisticRegression model."""
     if model.fit_intercept:
-        params = (model.coef_, model.intercept_)
+        params = [model.coef_, model.intercept_]
     else:
-        params = (model.coef_,)
+        params = [model.coef_,]
     return params
 
 

From 1eefe7682ceabfd6de48756d7eec1f8daacd9ae7 Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Thu, 22 Jun 2023 16:53:23 +0200
Subject: [PATCH 03/18] custom metrics example

---
 examples/custom_metrics/README.md        | 82 ++++++++++++++++++++++++
 examples/custom_metrics/client.py        | 60 +++++++++++++++++
 examples/custom_metrics/pyproject.toml   | 19 ++++++
 examples/custom_metrics/requirements.txt |  3 +
 examples/custom_metrics/run.sh           | 15 +++++
 examples/custom_metrics/server.py        | 29 +++++++++
 6 files changed, 208 insertions(+)
 create mode 100644 examples/custom_metrics/README.md
 create mode 100644 examples/custom_metrics/client.py
 create mode 100644 examples/custom_metrics/pyproject.toml
 create mode 100644 examples/custom_metrics/requirements.txt
 create mode 100755 examples/custom_metrics/run.sh
 create mode 100644 examples/custom_metrics/server.py

diff --git a/examples/custom_metrics/README.md b/examples/custom_metrics/README.md
new file mode 100644
index 000000000000..f8dc5b78215e
--- /dev/null
+++ b/examples/custom_metrics/README.md
@@ -0,0 +1,82 @@
+# Flower Example using Custom Metrics
+
+This simple example demonstrate how to calculate custom metrics over multiple clients beyond the traditional ones available in the ML frameworks. In this case, it demonstrate the use of ready-available scikit-learn metrics: accuracy, recall, precision, and f1-score.
+
+Once both the test values (`y_test`) and the predictions (`y_pred`) are available on the client side (`client.py`), other metrics or custom ones are possible to be calculated.
+
+The main takeaways of this implementation are:
+- the use of the `output_dict` on the client side - inside `evaluate` method on `client.py`
+- the use of the `evaluate_metrics_aggregation_fn` - to aggregate the metrics on the server side, part of the `strategy` on `server.py`
+
+This example is based on the `quickstart_tensorflow` with CIFAR-10, source [here](https://flower.dev/docs/quickstart-tensorflow.html).
+
+Using the CIFAR-10 dataset for classification, this is a multi-class classification problem, thus some changes on how to calculate the metrics using `average='micro'` and `np.argmax` is required. For binary classification, this is not required. Also, for unsupervised learning tasks, such as using a deep autoencoder, a custom metric based on reconstruction error could be implemented on client side.
+
+## Project Setup
+
+Start by cloning the example project. We prepared a single-line command that you can copy into your shell which will checkout the example for you:
+
+```shell
+git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/custom_metrics . && rm -rf flower && cd custom_metrics
+```
+
+This will create a new directory called `custom_metrics` containing the following files:
+
+```shell
+-- pyproject.toml
+-- requirements.txt
+-- client.py
+-- server.py
+-- README.md
+```
+
+### Installing Dependencies
+
+Project dependencies (such as `scikit-learn`, `tensorflow` and `flwr`) are defined in `pyproject.toml` and `requirements.txt`. We recommend [Poetry](https://python-poetry.org/docs/) to install those dependencies and manage your virtual environment ([Poetry installation](https://python-poetry.org/docs/#installation)) or [pip](https://pip.pypa.io/en/latest/development/), but feel free to use a different way of installing dependencies and managing virtual environments if you have other preferences.
+
+#### Poetry
+
+```shell
+poetry install
+poetry shell
+```
+
+Poetry will install all your dependencies in a newly created virtual environment. To verify that everything works correctly you can run the following command:
+
+```shell
+poetry run python3 -c "import flwr"
+```
+
+If you don't see any errors you're good to go!
+
+#### pip
+
+Write the command below in your terminal to install the dependencies according to the configuration file requirements.txt.
+
+```shell
+pip install -r requirements.txt
+```
+
+## Run Federated Learning with Custom Metrics
+
+Afterwards you are ready to start the Flower server as well as the clients. You can simply start the server in a terminal as follows:
+
+```shell
+poetry run python3 server.py
+```
+
+Now you are ready to start the Flower clients which will participate in the learning. To do so simply open two more terminals and run the following command in each:
+
+```shell
+poetry run python3 client.py
+```
+
+Alternatively you can run all of it in one shell as follows:
+
+```shell
+poetry run python3 server.py &
+poetry run python3 client.py &
+poetry run python3 client.py
+```
+
+You will see that Keras is starting a federated training. Have a look to the [Flower Quickstarter documentation](https://flower.dev/docs/quickstart-tensorflow.html) for a detailed explanation. You can add `steps_per_epoch=3` to `model.fit()` if you just want to evaluate that everything works without having to wait for the client-side training to finish (this will save you a lot of time during development).
diff --git a/examples/custom_metrics/client.py b/examples/custom_metrics/client.py
new file mode 100644
index 000000000000..acc12d82c274
--- /dev/null
+++ b/examples/custom_metrics/client.py
@@ -0,0 +1,60 @@
+import os
+
+import flwr as fl
+import numpy as np
+import tensorflow as tf
+from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
+
+
+# Make TensorFlow log less verbose
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+
+
+# Load model and data (MobileNetV2, CIFAR-10)
+model = tf.keras.applications.MobileNetV2((32, 32, 3), classes=10, weights=None)
+model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"])
+(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
+
+
+# method for extra learning metrics calculation
+def eval_learning(y_test, y_pred):
+    acc = accuracy_score(y_test, y_pred)
+    rec = recall_score(
+        y_test, y_pred, average="micro"
+    )  # average argument required for multi-class
+    prec = precision_score(y_test, y_pred, average="micro")
+    f1 = f1_score(y_test, y_pred, average="micro")
+    return acc, rec, prec, f1
+
+
+# Define Flower client
+class CifarClient(fl.client.NumPyClient):
+    def get_parameters(self, config):
+        return model.get_weights()
+
+    def fit(self, parameters, config):
+        model.set_weights(parameters)
+        model.fit(x_train, y_train, epochs=1, batch_size=32)
+        return model.get_weights(), len(x_train), {}
+
+    def evaluate(self, parameters, config):
+        model.set_weights(parameters)
+        loss, accuracy = model.evaluate(x_test, y_test)
+        y_pred = model.predict(x_test)
+        y_pred = np.argmax(y_pred, axis=1).reshape(
+            -1, 1
+        )  # MobileNetV2 outputs 10 possible classes, argmax returns just the most probable
+
+        acc, rec, prec, f1 = eval_learning(y_test, y_pred)
+        output_dict = {
+            "accuracy": accuracy,  # accuracy from tensorflow model.evaluate
+            "acc": acc,
+            "rec": rec,
+            "prec": prec,
+            "f1": f1,
+        }
+        return loss, len(x_test), output_dict
+
+
+# Start Flower client
+fl.client.start_numpy_client(server_address="127.0.0.1:8080", client=CifarClient())
diff --git a/examples/custom_metrics/pyproject.toml b/examples/custom_metrics/pyproject.toml
new file mode 100644
index 000000000000..97da6041042f
--- /dev/null
+++ b/examples/custom_metrics/pyproject.toml
@@ -0,0 +1,19 @@
+[build-system]
+requires = ["poetry-core>=1.4.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.poetry]
+name = "custom_metrics"
+version = "0.1.0"
+description = "Federated Learning with Flower and Custom Metrics"
+authors = [
+	"The Flower Authors <hello@flower.dev>",
+	"Gustavo Bertoli <>"
+]
+
+[tool.poetry.dependencies]
+python = ">=3.8,<3.11"
+flwr = "^1.0.0"
+scikit-learn = "^1.2.2"
+tensorflow-cpu = {version = "^2.9.1, !=2.11.1", markers="platform_machine == 'x86_64'"}
+tensorflow-macos = {version = "^2.9.1, !=2.11.1", markers="sys_platform == 'darwin' and platform_machine == 'arm64'"}
diff --git a/examples/custom_metrics/requirements.txt b/examples/custom_metrics/requirements.txt
new file mode 100644
index 000000000000..ac4b2a58c220
--- /dev/null
+++ b/examples/custom_metrics/requirements.txt
@@ -0,0 +1,3 @@
+flwr==1.4.0
+scikit-learn==1.2.2
+tensorflow==2.12.0
diff --git a/examples/custom_metrics/run.sh b/examples/custom_metrics/run.sh
new file mode 100755
index 000000000000..c64f362086aa
--- /dev/null
+++ b/examples/custom_metrics/run.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+echo "Starting server"
+python server.py &
+sleep 3  # Sleep for 3s to give the server enough time to start
+
+for i in `seq 0 1`; do
+    echo "Starting client $i"
+    python client.py &
+done
+
+# This will allow you to use CTRL+C to stop all background processes
+trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM
+# Wait for all background processes to complete
+wait
diff --git a/examples/custom_metrics/server.py b/examples/custom_metrics/server.py
new file mode 100644
index 000000000000..d888223a05ca
--- /dev/null
+++ b/examples/custom_metrics/server.py
@@ -0,0 +1,29 @@
+import flwr as fl
+import numpy as np
+
+
+def average_metrics(metrics):
+    accuracies_tf = np.mean([metric["accuracy"] for _, metric in metrics])
+    accuracies = np.mean([metric["acc"] for _, metric in metrics])
+    recalls = np.mean([metric["rec"] for _, metric in metrics])
+    precisions = np.mean([metric["prec"] for _, metric in metrics])
+    f1s = np.mean([metric["f1"] for _, metric in metrics])
+
+    return {
+        "accuracy": accuracies_tf,
+        "acc": accuracies,
+        "rec": recalls,
+        "prec": precisions,
+        "f1": f1s,
+    }
+
+
+strategy = fl.server.strategy.FedAvg(evaluate_metrics_aggregation_fn=average_metrics)
+
+
+# Start Flower server
+fl.server.start_server(
+    server_address="0.0.0.0:8080",
+    config=fl.server.ServerConfig(num_rounds=3),
+    strategy=strategy,
+)

From 63ee9898274eaf7e098e899672ec1f618c34bbb5 Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Tue, 16 Jan 2024 16:16:50 +0100
Subject: [PATCH 04/18] Format and test ok

---
 .../client.py                                 |  0
 .../pyproject.toml                            |  6 +-
 .../requirements.txt                          |  0
 .../{custom_metrics => custom-metrics}/run.sh |  0
 .../server.py                                 |  0
 examples/custom_metrics/README.md             | 82 -------------------
 6 files changed, 3 insertions(+), 85 deletions(-)
 rename examples/{custom_metrics => custom-metrics}/client.py (100%)
 rename examples/{custom_metrics => custom-metrics}/pyproject.toml (89%)
 rename examples/{custom_metrics => custom-metrics}/requirements.txt (100%)
 rename examples/{custom_metrics => custom-metrics}/run.sh (100%)
 rename examples/{custom_metrics => custom-metrics}/server.py (100%)
 delete mode 100644 examples/custom_metrics/README.md

diff --git a/examples/custom_metrics/client.py b/examples/custom-metrics/client.py
similarity index 100%
rename from examples/custom_metrics/client.py
rename to examples/custom-metrics/client.py
diff --git a/examples/custom_metrics/pyproject.toml b/examples/custom-metrics/pyproject.toml
similarity index 89%
rename from examples/custom_metrics/pyproject.toml
rename to examples/custom-metrics/pyproject.toml
index 97da6041042f..99e62c645610 100644
--- a/examples/custom_metrics/pyproject.toml
+++ b/examples/custom-metrics/pyproject.toml
@@ -3,7 +3,7 @@ requires = ["poetry-core>=1.4.0"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
-name = "custom_metrics"
+name = "custom-metrics"
 version = "0.1.0"
 description = "Federated Learning with Flower and Custom Metrics"
 authors = [
@@ -12,8 +12,8 @@ authors = [
 ]
 
 [tool.poetry.dependencies]
-python = ">=3.8,<3.11"
-flwr = "^1.0.0"
+python = "^3.8"
+flwr = ">=1.0,<2.0"
 scikit-learn = "^1.2.2"
 tensorflow-cpu = {version = "^2.9.1, !=2.11.1", markers="platform_machine == 'x86_64'"}
 tensorflow-macos = {version = "^2.9.1, !=2.11.1", markers="sys_platform == 'darwin' and platform_machine == 'arm64'"}
diff --git a/examples/custom_metrics/requirements.txt b/examples/custom-metrics/requirements.txt
similarity index 100%
rename from examples/custom_metrics/requirements.txt
rename to examples/custom-metrics/requirements.txt
diff --git a/examples/custom_metrics/run.sh b/examples/custom-metrics/run.sh
similarity index 100%
rename from examples/custom_metrics/run.sh
rename to examples/custom-metrics/run.sh
diff --git a/examples/custom_metrics/server.py b/examples/custom-metrics/server.py
similarity index 100%
rename from examples/custom_metrics/server.py
rename to examples/custom-metrics/server.py
diff --git a/examples/custom_metrics/README.md b/examples/custom_metrics/README.md
deleted file mode 100644
index f8dc5b78215e..000000000000
--- a/examples/custom_metrics/README.md
+++ /dev/null
@@ -1,82 +0,0 @@
-# Flower Example using Custom Metrics
-
-This simple example demonstrate how to calculate custom metrics over multiple clients beyond the traditional ones available in the ML frameworks. In this case, it demonstrate the use of ready-available scikit-learn metrics: accuracy, recall, precision, and f1-score.
-
-Once both the test values (`y_test`) and the predictions (`y_pred`) are available on the client side (`client.py`), other metrics or custom ones are possible to be calculated.
-
-The main takeaways of this implementation are:
-- the use of the `output_dict` on the client side - inside `evaluate` method on `client.py`
-- the use of the `evaluate_metrics_aggregation_fn` - to aggregate the metrics on the server side, part of the `strategy` on `server.py`
-
-This example is based on the `quickstart_tensorflow` with CIFAR-10, source [here](https://flower.dev/docs/quickstart-tensorflow.html).
-
-Using the CIFAR-10 dataset for classification, this is a multi-class classification problem, thus some changes on how to calculate the metrics using `average='micro'` and `np.argmax` is required. For binary classification, this is not required. Also, for unsupervised learning tasks, such as using a deep autoencoder, a custom metric based on reconstruction error could be implemented on client side.
-
-## Project Setup
-
-Start by cloning the example project. We prepared a single-line command that you can copy into your shell which will checkout the example for you:
-
-```shell
-git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/custom_metrics . && rm -rf flower && cd custom_metrics
-```
-
-This will create a new directory called `custom_metrics` containing the following files:
-
-```shell
--- pyproject.toml
--- requirements.txt
--- client.py
--- server.py
--- README.md
-```
-
-### Installing Dependencies
-
-Project dependencies (such as `scikit-learn`, `tensorflow` and `flwr`) are defined in `pyproject.toml` and `requirements.txt`. We recommend [Poetry](https://python-poetry.org/docs/) to install those dependencies and manage your virtual environment ([Poetry installation](https://python-poetry.org/docs/#installation)) or [pip](https://pip.pypa.io/en/latest/development/), but feel free to use a different way of installing dependencies and managing virtual environments if you have other preferences.
-
-#### Poetry
-
-```shell
-poetry install
-poetry shell
-```
-
-Poetry will install all your dependencies in a newly created virtual environment. To verify that everything works correctly you can run the following command:
-
-```shell
-poetry run python3 -c "import flwr"
-```
-
-If you don't see any errors you're good to go!
-
-#### pip
-
-Write the command below in your terminal to install the dependencies according to the configuration file requirements.txt.
-
-```shell
-pip install -r requirements.txt
-```
-
-## Run Federated Learning with Custom Metrics
-
-Afterwards you are ready to start the Flower server as well as the clients. You can simply start the server in a terminal as follows:
-
-```shell
-poetry run python3 server.py
-```
-
-Now you are ready to start the Flower clients which will participate in the learning. To do so simply open two more terminals and run the following command in each:
-
-```shell
-poetry run python3 client.py
-```
-
-Alternatively you can run all of it in one shell as follows:
-
-```shell
-poetry run python3 server.py &
-poetry run python3 client.py &
-poetry run python3 client.py
-```
-
-You will see that Keras is starting a federated training. Have a look to the [Flower Quickstarter documentation](https://flower.dev/docs/quickstart-tensorflow.html) for a detailed explanation. You can add `steps_per_epoch=3` to `model.fit()` if you just want to evaluate that everything works without having to wait for the client-side training to finish (this will save you a lot of time during development).

From 5e25c3ce7a31782f604c5cea83803c6a71ebfc2f Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Tue, 16 Jan 2024 16:19:14 +0100
Subject: [PATCH 05/18] README

---
 examples/custom-metrics/README.md | 83 +++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 examples/custom-metrics/README.md

diff --git a/examples/custom-metrics/README.md b/examples/custom-metrics/README.md
new file mode 100644
index 000000000000..06490a8175ba
--- /dev/null
+++ b/examples/custom-metrics/README.md
@@ -0,0 +1,83 @@
+# Flower Example using Custom Metrics
+
+This simple example demonstrate how to calculate custom metrics over multiple clients beyond the traditional ones available in the ML frameworks. In this case, it demonstrate the use of ready-available scikit-learn metrics: accuracy, recall, precision, and f1-score.
+
+Once both the test values (`y_test`) and the predictions (`y_pred`) are available on the client side (`client.py`), other metrics or custom ones are possible to be calculated.
+
+The main takeaways of this implementation are:
+
+- the use of the `output_dict` on the client side - inside `evaluate` method on `client.py`
+- the use of the `evaluate_metrics_aggregation_fn` - to aggregate the metrics on the server side, part of the `strategy` on `server.py`
+
+This example is based on the `quickstart_tensorflow` with CIFAR-10, source [here](https://flower.dev/docs/quickstart-tensorflow.html).
+
+Using the CIFAR-10 dataset for classification, this is a multi-class classification problem, thus some changes on how to calculate the metrics using `average='micro'` and `np.argmax` is required. For binary classification, this is not required. Also, for unsupervised learning tasks, such as using a deep autoencoder, a custom metric based on reconstruction error could be implemented on client side.
+
+## Project Setup
+
+Start by cloning the example project. We prepared a single-line command that you can copy into your shell which will checkout the example for you:
+
+```shell
+git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/custom-metrics . && rm -rf flower && cd custom-metrics
+```
+
+This will create a new directory called `custom-metrics` containing the following files:
+
+```shell
+-- pyproject.toml
+-- requirements.txt
+-- client.py
+-- server.py
+-- README.md
+```
+
+### Installing Dependencies
+
+Project dependencies (such as `scikit-learn`, `tensorflow` and `flwr`) are defined in `pyproject.toml` and `requirements.txt`. We recommend [Poetry](https://python-poetry.org/docs/) to install those dependencies and manage your virtual environment ([Poetry installation](https://python-poetry.org/docs/#installation)) or [pip](https://pip.pypa.io/en/latest/development/), but feel free to use a different way of installing dependencies and managing virtual environments if you have other preferences.
+
+#### Poetry
+
+```shell
+poetry install
+poetry shell
+```
+
+Poetry will install all your dependencies in a newly created virtual environment. To verify that everything works correctly you can run the following command:
+
+```shell
+poetry run python3 -c "import flwr"
+```
+
+If you don't see any errors you're good to go!
+
+#### pip
+
+Write the command below in your terminal to install the dependencies according to the configuration file requirements.txt.
+
+```shell
+pip install -r requirements.txt
+```
+
+## Run Federated Learning with Custom Metrics
+
+Afterwards you are ready to start the Flower server as well as the clients. You can simply start the server in a terminal as follows:
+
+```shell
+poetry run python3 server.py
+```
+
+Now you are ready to start the Flower clients which will participate in the learning. To do so simply open two more terminals and run the following command in each:
+
+```shell
+poetry run python3 client.py
+```
+
+Alternatively you can run all of it in one shell as follows:
+
+```shell
+poetry run python3 server.py &
+poetry run python3 client.py &
+poetry run python3 client.py
+```
+
+You will see that Keras is starting a federated training. Have a look to the [Flower Quickstarter documentation](https://flower.dev/docs/quickstart-tensorflow.html) for a detailed explanation. You can add `steps_per_epoch=3` to `model.fit()` if you just want to evaluate that everything works without having to wait for the client-side training to finish (this will save you a lot of time during development).

From 949b4ee81fff510844f2360efb9f4a2645b04c0a Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Wed, 17 Jan 2024 16:42:49 +0100
Subject: [PATCH 06/18] Update examples/custom-metrics/requirements.txt

Co-authored-by: Daniel J. Beutel <daniel@flower.dev>
---
 examples/custom-metrics/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/custom-metrics/requirements.txt b/examples/custom-metrics/requirements.txt
index ac4b2a58c220..daa7de82570b 100644
--- a/examples/custom-metrics/requirements.txt
+++ b/examples/custom-metrics/requirements.txt
@@ -1,3 +1,3 @@
-flwr==1.4.0
+flwr>=1.0, <2.0
 scikit-learn==1.2.2
 tensorflow==2.12.0

From 8298aec591f6d026fd41c49a39c605c2189f58cb Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Wed, 17 Jan 2024 16:43:59 +0100
Subject: [PATCH 07/18] Update examples/custom-metrics/client.py

Co-authored-by: Daniel J. Beutel <daniel@flower.dev>
---
 examples/custom-metrics/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/custom-metrics/client.py b/examples/custom-metrics/client.py
index acc12d82c274..5fdf49bf201d 100644
--- a/examples/custom-metrics/client.py
+++ b/examples/custom-metrics/client.py
@@ -28,7 +28,7 @@ def eval_learning(y_test, y_pred):
 
 
 # Define Flower client
-class CifarClient(fl.client.NumPyClient):
+class FlowerClient(fl.client.NumPyClient):
     def get_parameters(self, config):
         return model.get_weights()
 

From c5be003c9a4d61f53d03592229eab19ff5ba5f9d Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Wed, 17 Jan 2024 16:47:48 +0100
Subject: [PATCH 08/18] Update to FlowerClient class and added e-mail

---
 examples/custom-metrics/client.py      | 2 +-
 examples/custom-metrics/pyproject.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/custom-metrics/client.py b/examples/custom-metrics/client.py
index 5fdf49bf201d..c1def5435801 100644
--- a/examples/custom-metrics/client.py
+++ b/examples/custom-metrics/client.py
@@ -57,4 +57,4 @@ def evaluate(self, parameters, config):
 
 
 # Start Flower client
-fl.client.start_numpy_client(server_address="127.0.0.1:8080", client=CifarClient())
+fl.client.start_numpy_client(server_address="127.0.0.1:8080", client=FlowerClient())
diff --git a/examples/custom-metrics/pyproject.toml b/examples/custom-metrics/pyproject.toml
index 99e62c645610..906035628564 100644
--- a/examples/custom-metrics/pyproject.toml
+++ b/examples/custom-metrics/pyproject.toml
@@ -8,7 +8,7 @@ version = "0.1.0"
 description = "Federated Learning with Flower and Custom Metrics"
 authors = [
 	"The Flower Authors <hello@flower.dev>",
-	"Gustavo Bertoli <>"
+	"Gustavo Bertoli <gubertoli -at- gmail.com>"
 ]
 
 [tool.poetry.dependencies]

From 41043b74fe6921f4633cdc5722802ee336c9a5b8 Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Wed, 17 Jan 2024 21:24:53 +0100
Subject: [PATCH 09/18] Using flwr-datasets and tested with pip and poetry

---
 examples/custom-metrics/README.md        | 33 +++++++++++++++++++-----
 examples/custom-metrics/client.py        | 15 +++++++++--
 examples/custom-metrics/pyproject.toml   |  6 ++---
 examples/custom-metrics/requirements.txt |  5 ++--
 4 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/examples/custom-metrics/README.md b/examples/custom-metrics/README.md
index 06490a8175ba..b67b9ca88abd 100644
--- a/examples/custom-metrics/README.md
+++ b/examples/custom-metrics/README.md
@@ -9,7 +9,7 @@ The main takeaways of this implementation are:
 - the use of the `output_dict` on the client side - inside `evaluate` method on `client.py`
 - the use of the `evaluate_metrics_aggregation_fn` - to aggregate the metrics on the server side, part of the `strategy` on `server.py`
 
-This example is based on the `quickstart_tensorflow` with CIFAR-10, source [here](https://flower.dev/docs/quickstart-tensorflow.html).
+This example is based on the `quickstart_tensorflow` with CIFAR-10, source [here](https://flower.dev/docs/quickstart-tensorflow.html), with the addition of [Flower Datasets](https://flower.dev/docs/datasets/index.html) to retrieve the CIFAR-10.
 
 Using the CIFAR-10 dataset for classification, this is a multi-class classification problem, thus some changes on how to calculate the metrics using `average='micro'` and `np.argmax` is required. For binary classification, this is not required. Also, for unsupervised learning tasks, such as using a deep autoencoder, a custom metric based on reconstruction error could be implemented on client side.
 
@@ -55,6 +55,8 @@ If you don't see any errors you're good to go!
 Write the command below in your terminal to install the dependencies according to the configuration file requirements.txt.
 
 ```shell
+python -m venv venv
+source venv/bin/activate
 pip install -r requirements.txt
 ```
 
@@ -63,21 +65,40 @@ pip install -r requirements.txt
 Afterwards you are ready to start the Flower server as well as the clients. You can simply start the server in a terminal as follows:
 
 ```shell
-poetry run python3 server.py
+python server.py
 ```
 
 Now you are ready to start the Flower clients which will participate in the learning. To do so simply open two more terminals and run the following command in each:
 
 ```shell
-poetry run python3 client.py
+python client.py
 ```
 
 Alternatively you can run all of it in one shell as follows:
 
 ```shell
-poetry run python3 server.py &
-poetry run python3 client.py &
-poetry run python3 client.py
+python server.py &
+python client.py &
+python client.py
+```
+
+or
+
+```shell
+chmod +x run.sh
+./run.sh
 ```
 
 You will see that Keras is starting a federated training. Have a look to the [Flower Quickstarter documentation](https://flower.dev/docs/quickstart-tensorflow.html) for a detailed explanation. You can add `steps_per_epoch=3` to `model.fit()` if you just want to evaluate that everything works without having to wait for the client-side training to finish (this will save you a lot of time during development).
+
+Running `run.sh` will result in the following output (after 3 rounds):
+
+```shell
+INFO flwr 2024-01-17 17:45:23,794 | app.py:228 | app_fit: metrics_distributed {
+    'accuracy': [(1, 0.10000000149011612), (2, 0.10000000149011612), (3, 0.3393000066280365)], 
+    'acc': [(1, 0.1), (2, 0.1), (3, 0.3393)], 
+    'rec': [(1, 0.1), (2, 0.1), (3, 0.3393)], 
+    'prec': [(1, 0.1), (2, 0.1), (3, 0.3393)], 
+    'f1': [(1, 0.10000000000000002), (2, 0.10000000000000002), (3, 0.3393)]
+}
+```
diff --git a/examples/custom-metrics/client.py b/examples/custom-metrics/client.py
index c1def5435801..518d953d4c38 100644
--- a/examples/custom-metrics/client.py
+++ b/examples/custom-metrics/client.py
@@ -4,16 +4,27 @@
 import numpy as np
 import tensorflow as tf
 from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
+from flwr_datasets import FederatedDataset
 
 
 # Make TensorFlow log less verbose
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 
 
-# Load model and data (MobileNetV2, CIFAR-10)
+# Load model (MobileNetV2)
 model = tf.keras.applications.MobileNetV2((32, 32, 3), classes=10, weights=None)
 model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"])
-(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
+
+# Load data with Flower Datasets (CIFAR-10)
+fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
+train = fds.load_full("train")
+test = fds.load_full("test")
+
+# using Numpy format
+train_np = train.with_format("numpy")
+test_np = test.with_format("numpy")
+x_train, y_train = train_np["img"], train_np["label"]
+x_test, y_test = test_np["img"], test_np["label"]
 
 
 # method for extra learning metrics calculation
diff --git a/examples/custom-metrics/pyproject.toml b/examples/custom-metrics/pyproject.toml
index 906035628564..8a2da6562018 100644
--- a/examples/custom-metrics/pyproject.toml
+++ b/examples/custom-metrics/pyproject.toml
@@ -12,8 +12,8 @@ authors = [
 ]
 
 [tool.poetry.dependencies]
-python = "^3.8"
+python = ">=3.8,<3.11"
 flwr = ">=1.0,<2.0"
+flwr-datasets = { version = "*", extras = ["vision"] }
 scikit-learn = "^1.2.2"
-tensorflow-cpu = {version = "^2.9.1, !=2.11.1", markers="platform_machine == 'x86_64'"}
-tensorflow-macos = {version = "^2.9.1, !=2.11.1", markers="sys_platform == 'darwin' and platform_machine == 'arm64'"}
+tensorflow = "==2.12.0"
\ No newline at end of file
diff --git a/examples/custom-metrics/requirements.txt b/examples/custom-metrics/requirements.txt
index daa7de82570b..69d867c5f287 100644
--- a/examples/custom-metrics/requirements.txt
+++ b/examples/custom-metrics/requirements.txt
@@ -1,3 +1,4 @@
-flwr>=1.0, <2.0
-scikit-learn==1.2.2
+flwr>=1.0,<2.0
+flwr-datasets[vision]
+scikit-learn>=1.2.2
 tensorflow==2.12.0

From 56e0b367c96173bc45d69003333208d5f622e754 Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Thu, 18 Jan 2024 12:18:53 +0100
Subject: [PATCH 10/18] Uppercase comment

Co-authored-by: Yan Gao <y.gaogy@gmail.com>
---
 examples/custom-metrics/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/custom-metrics/client.py b/examples/custom-metrics/client.py
index 518d953d4c38..071d1409a943 100644
--- a/examples/custom-metrics/client.py
+++ b/examples/custom-metrics/client.py
@@ -20,7 +20,7 @@
 train = fds.load_full("train")
 test = fds.load_full("test")
 
-# using Numpy format
+# Using Numpy format
 train_np = train.with_format("numpy")
 test_np = test.with_format("numpy")
 x_train, y_train = train_np["img"], train_np["label"]

From d3eebe7f84d829bcca601007419a0e860538a574 Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Thu, 18 Jan 2024 12:19:13 +0100
Subject: [PATCH 11/18] Uppercase comment

Co-authored-by: Yan Gao <y.gaogy@gmail.com>
---
 examples/custom-metrics/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/custom-metrics/client.py b/examples/custom-metrics/client.py
index 071d1409a943..b2206118ed44 100644
--- a/examples/custom-metrics/client.py
+++ b/examples/custom-metrics/client.py
@@ -27,7 +27,7 @@
 x_test, y_test = test_np["img"], test_np["label"]
 
 
-# method for extra learning metrics calculation
+# Method for extra learning metrics calculation
 def eval_learning(y_test, y_pred):
     acc = accuracy_score(y_test, y_pred)
     rec = recall_score(

From 290ac5a9a04843141215e219769f9963975e8bd2 Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Thu, 18 Jan 2024 12:19:53 +0100
Subject: [PATCH 12/18] Add comment

Co-authored-by: Yan Gao <y.gaogy@gmail.com>
---
 examples/custom-metrics/server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/custom-metrics/server.py b/examples/custom-metrics/server.py
index d888223a05ca..9cec0fae9852 100644
--- a/examples/custom-metrics/server.py
+++ b/examples/custom-metrics/server.py
@@ -1,7 +1,7 @@
 import flwr as fl
 import numpy as np
 
-
+# Define metrics aggregation function
 def average_metrics(metrics):
     accuracies_tf = np.mean([metric["accuracy"] for _, metric in metrics])
     accuracies = np.mean([metric["acc"] for _, metric in metrics])

From 8d1c38038339af41242960d06209a881042752a4 Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Thu, 18 Jan 2024 12:20:32 +0100
Subject: [PATCH 13/18] Add comment about waiting for server.py

Co-authored-by: Yan Gao <y.gaogy@gmail.com>
---
 examples/custom-metrics/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/custom-metrics/README.md b/examples/custom-metrics/README.md
index b67b9ca88abd..e38c9f87435a 100644
--- a/examples/custom-metrics/README.md
+++ b/examples/custom-metrics/README.md
@@ -78,6 +78,7 @@ Alternatively you can run all of it in one shell as follows:
 
 ```shell
 python server.py &
+# Wait for a few seconds to give the server enough time to start, then:
 python client.py &
 python client.py
 ```

From 4b0978d6ac6e35499b0992b93337697e88b01d06 Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Thu, 18 Jan 2024 12:21:08 +0100
Subject: [PATCH 14/18] Add comment about strategy definition

Co-authored-by: Yan Gao <y.gaogy@gmail.com>
---
 examples/custom-metrics/server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/custom-metrics/server.py b/examples/custom-metrics/server.py
index 9cec0fae9852..1da7d2e06695 100644
--- a/examples/custom-metrics/server.py
+++ b/examples/custom-metrics/server.py
@@ -17,7 +17,7 @@ def average_metrics(metrics):
         "f1": f1s,
     }
 
-
+# Define strategy
 strategy = fl.server.strategy.FedAvg(evaluate_metrics_aggregation_fn=average_metrics)
 
 

From 5d9319b4d8c14b1d1e0af2f2a9b3172a5c5fd849 Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Thu, 18 Jan 2024 12:21:37 +0100
Subject: [PATCH 15/18] Fix typos

Co-authored-by: Yan Gao <y.gaogy@gmail.com>
---
 examples/custom-metrics/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/custom-metrics/README.md b/examples/custom-metrics/README.md
index e38c9f87435a..03773127e277 100644
--- a/examples/custom-metrics/README.md
+++ b/examples/custom-metrics/README.md
@@ -1,6 +1,6 @@
 # Flower Example using Custom Metrics
 
-This simple example demonstrate how to calculate custom metrics over multiple clients beyond the traditional ones available in the ML frameworks. In this case, it demonstrate the use of ready-available scikit-learn metrics: accuracy, recall, precision, and f1-score.
+This simple example demonstrates how to calculate custom metrics over multiple clients beyond the traditional ones available in the ML frameworks. In this case, it demonstrates the use of ready-available `scikit-learn` metrics: accuracy, recall, precision, and f1-score.
 
 Once both the test values (`y_test`) and the predictions (`y_pred`) are available on the client side (`client.py`), other metrics or custom ones are possible to be calculated.
 

From 8e29ba81624f03fd62546dc6facc1b2bd2e75393 Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Thu, 18 Jan 2024 12:22:00 +0100
Subject: [PATCH 16/18] Fix typo

Co-authored-by: Yan Gao <y.gaogy@gmail.com>
---
 examples/custom-metrics/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/custom-metrics/README.md b/examples/custom-metrics/README.md
index 03773127e277..4c01ccd9970f 100644
--- a/examples/custom-metrics/README.md
+++ b/examples/custom-metrics/README.md
@@ -9,7 +9,7 @@ The main takeaways of this implementation are:
 - the use of the `output_dict` on the client side - inside `evaluate` method on `client.py`
 - the use of the `evaluate_metrics_aggregation_fn` - to aggregate the metrics on the server side, part of the `strategy` on `server.py`
 
-This example is based on the `quickstart_tensorflow` with CIFAR-10, source [here](https://flower.dev/docs/quickstart-tensorflow.html), with the addition of [Flower Datasets](https://flower.dev/docs/datasets/index.html) to retrieve the CIFAR-10.
+This example is based on the `quickstart-tensorflow` with CIFAR-10, source [here](https://flower.dev/docs/quickstart-tensorflow.html), with the addition of [Flower Datasets](https://flower.dev/docs/datasets/index.html) to retrieve the CIFAR-10.
 
 Using the CIFAR-10 dataset for classification, this is a multi-class classification problem, thus some changes on how to calculate the metrics using `average='micro'` and `np.argmax` is required. For binary classification, this is not required. Also, for unsupervised learning tasks, such as using a deep autoencoder, a custom metric based on reconstruction error could be implemented on client side.
 

From 01e3ba22e879c95133059753d803ac577422547a Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Thu, 18 Jan 2024 12:23:22 +0100
Subject: [PATCH 17/18] Add missing reference to run.sh

Co-authored-by: Yan Gao <y.gaogy@gmail.com>
---
 examples/custom-metrics/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/custom-metrics/README.md b/examples/custom-metrics/README.md
index 4c01ccd9970f..debcd7919839 100644
--- a/examples/custom-metrics/README.md
+++ b/examples/custom-metrics/README.md
@@ -28,6 +28,7 @@ This will create a new directory called `custom-metrics` containing the followin
 -- requirements.txt
 -- client.py
 -- server.py
+-- run.sh
 -- README.md
 ```
 

From e5ecd6b483d3151bf1e3e50739fde2f072ee57a3 Mon Sep 17 00:00:00 2001
From: Gustavo Bertoli <gubertoli@gmail.com>
Date: Thu, 18 Jan 2024 13:27:57 +0100
Subject: [PATCH 18/18] Improving docstring about mean average and about
 weighted average

---
 examples/custom-metrics/server.py | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/examples/custom-metrics/server.py b/examples/custom-metrics/server.py
index 1da7d2e06695..f8420bf51f16 100644
--- a/examples/custom-metrics/server.py
+++ b/examples/custom-metrics/server.py
@@ -1,8 +1,36 @@
 import flwr as fl
 import numpy as np
 
+
 # Define metrics aggregation function
 def average_metrics(metrics):
+    """Aggregate metrics from multiple clients by calculating mean averages.
+
+    Parameters:
+    - metrics (list): A list containing tuples, where each tuple represents metrics for a client.
+                    Each tuple is structured as (num_examples, metric), where:
+                    - num_examples (int): The number of examples used to compute the metrics.
+                    - metric (dict): A dictionary containing custom metrics provided as `output_dict`
+                                    in the `evaluate` method from `client.py`.
+
+    Returns:
+    A dictionary with the aggregated metrics, calculating mean averages. The keys of the
+    dictionary represent different metrics, including:
+    - 'accuracy': Mean accuracy calculated by TensorFlow.
+    - 'acc': Mean accuracy from scikit-learn.
+    - 'rec': Mean recall from scikit-learn.
+    - 'prec': Mean precision from scikit-learn.
+    - 'f1': Mean F1 score from scikit-learn.
+
+    Note: If a weighted average is required, the `num_examples` parameter can be leveraged.
+
+    Example:
+        Example `metrics` list for two clients after the last round:
+        [(10000, {'prec': 0.108, 'acc': 0.108, 'f1': 0.108, 'accuracy': 0.1080000028014183, 'rec': 0.108}),
+        (10000, {'f1': 0.108, 'rec': 0.108, 'accuracy': 0.1080000028014183, 'prec': 0.108, 'acc': 0.108})]
+    """
+
+    # Here num_examples are not taken into account by using _
     accuracies_tf = np.mean([metric["accuracy"] for _, metric in metrics])
     accuracies = np.mean([metric["acc"] for _, metric in metrics])
     recalls = np.mean([metric["rec"] for _, metric in metrics])
@@ -17,7 +45,8 @@ def average_metrics(metrics):
         "f1": f1s,
     }
 
-# Define strategy
+
+# Define strategy and the custom aggregation function for the evaluation metrics
 strategy = fl.server.strategy.FedAvg(evaluate_metrics_aggregation_fn=average_metrics)