From de023a19319156214cc87bce5305b4e3c9c928ba Mon Sep 17 00:00:00 2001
From: Javier <jafermarq@users.noreply.github.com>
Date: Mon, 13 Nov 2023 13:49:39 +0000
Subject: [PATCH 1/6] Update README.md (#2592)

---
 examples/whisper-federated-finetuning/README.md | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/examples/whisper-federated-finetuning/README.md b/examples/whisper-federated-finetuning/README.md
index 712cf0e88369..e89a09519fed 100644
--- a/examples/whisper-federated-finetuning/README.md
+++ b/examples/whisper-federated-finetuning/README.md
@@ -140,10 +140,7 @@ python sim.py # append --num_gpus=0 if you don't have GPUs on your system
 
 # Once finished centralised evaluation loss/acc metrics will be shown
 
-INFO flwr 2023-11-08 14:03:57,557 | app.py:229 | app_fit: metrics_centralized {'val_accuracy': [(0, 0.03977158885994791),
- (1, 0.6940492887196954), (2, 0.5969745541975556), (3, 0.8794830695251452), (4, 0.9021238228811861), (5, 0.8943097575636145),
- (6, 0.9047285113203767), (7, 0.9330795431777199), (8, 0.9446002805049089), (9, 0.9556201162091765)],
- 'test_accuracy': [(10, 0.9719836400817996)]}
+INFO flwr 2023-11-08 14:03:57,557 | app.py:229 | app_fit: metrics_centralized {'val_accuracy': [(0, 0.03977158885994791), (1, 0.6940492887196954), (2, 0.5969745541975556), (3, 0.8794830695251452), (4, 0.9021238228811861), (5, 0.8943097575636145), (6, 0.9047285113203767), (7, 0.9330795431777199), (8, 0.9446002805049089), (9, 0.9556201162091765)], 'test_accuracy': [(10, 0.9719836400817996)]}
 ```
 
 ![Global validation accuracy FL with Whisper model](_static/whisper_flower_acc.png)

From c19a8d61d17cf8874bf924bf807d3dedc31aeb32 Mon Sep 17 00:00:00 2001
From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com>
Date: Tue, 14 Nov 2023 09:38:17 +0100
Subject: [PATCH 2/6] Add Scikit Learn integration tests with FDS (#2387)

---
 datasets/e2e/scikit-learn/pyproject.toml  | 15 ++++
 datasets/e2e/scikit-learn/sklearn_test.py | 94 +++++++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 datasets/e2e/scikit-learn/pyproject.toml
 create mode 100644 datasets/e2e/scikit-learn/sklearn_test.py

diff --git a/datasets/e2e/scikit-learn/pyproject.toml b/datasets/e2e/scikit-learn/pyproject.toml
new file mode 100644
index 000000000000..7e22644566cf
--- /dev/null
+++ b/datasets/e2e/scikit-learn/pyproject.toml
@@ -0,0 +1,15 @@
+[build-system]
+requires = ["poetry-core>=1.4.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.poetry]
+name = "fds-e2e-sklearn"
+version = "0.1.0"
+description = "Flower Datasets with scikit-learn"
+authors = ["The Flower Authors <hello@flower.dev>"]
+
+[tool.poetry.dependencies]
+python = "^3.8"
+flwr-datasets = { path = "./../../", extras = ["vision"] }
+scikit-learn = "^1.2.0"
+parameterized = "==0.9.0"
diff --git a/datasets/e2e/scikit-learn/sklearn_test.py b/datasets/e2e/scikit-learn/sklearn_test.py
new file mode 100644
index 000000000000..e5e6d347ee37
--- /dev/null
+++ b/datasets/e2e/scikit-learn/sklearn_test.py
@@ -0,0 +1,94 @@
+import unittest
+
+import numpy as np
+from parameterized import parameterized_class
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import StandardScaler
+
+from flwr_datasets import FederatedDataset
+
+
+# Using parameterized testing, two different sets of preprocessing:
+# 1. Without scaling.
+# 2. With standard scaling.
+@parameterized_class(
+    [
+        {"dataset_name": "mnist", "preprocessing": None},
+        {"dataset_name": "mnist", "preprocessing": StandardScaler()},
+    ]
+)
+class FdsWithSKLearn(unittest.TestCase):
+    """Test Flower Datasets with Scikit-learn's Logistic Regression."""
+
+    dataset_name = ""
+    preprocessing = None
+
+    def _get_partition_data(self):
+        """Retrieve partition data."""
+        partition_id = 0
+        fds = FederatedDataset(dataset=self.dataset_name, partitioners={"train": 10})
+        partition = fds.load_partition(partition_id, "train")
+        partition.set_format("numpy")
+        partition_train_test = partition.train_test_split(test_size=0.2)
+        X_train, y_train = partition_train_test["train"]["image"], partition_train_test[
+            "train"]["label"]
+        X_test, y_test = partition_train_test["test"]["image"], partition_train_test[
+            "test"]["label"]
+        X_train = X_train.reshape(-1, 28 * 28)
+        X_test = X_test.reshape(-1, 28 * 28)
+        if self.preprocessing:
+            self.preprocessing.fit(X_train)
+            X_train = self.preprocessing.transform(X_train)
+            X_test = self.preprocessing.transform(X_test)
+
+        return X_train, X_test, y_train, y_test
+
+    def test_data_shape(self):
+        """Test if the data shape is maintained after preprocessing."""
+        X_train, _, _, _ = self._get_partition_data()
+        self.assertEqual(X_train.shape, (4_800, 28 * 28))
+
+    def test_X_train_type(self):
+        """Test if the data type is correct."""
+        X_train, _, _, _ = self._get_partition_data()
+        self.assertIsInstance(X_train, np.ndarray)
+
+    def test_y_train_type(self):
+        """Test if the data type is correct."""
+        _, _, y_train, _ = self._get_partition_data()
+        self.assertIsInstance(y_train, np.ndarray)
+
+    def test_X_test_type(self):
+        """Test if the data type is correct."""
+        _, X_test, _, _ = self._get_partition_data()
+        self.assertIsInstance(X_test, np.ndarray)
+
+    def test_y_test_type(self):
+        """Test if the data type is correct."""
+        _, _, _, y_test = self._get_partition_data()
+        self.assertIsInstance(y_test, np.ndarray)
+
+    def test_train_classifier(self):
+        """Test if the classifier trains without errors."""
+        X_train, X_test, y_train, y_test = self._get_partition_data()
+        try:
+            clf = LogisticRegression()
+            clf.fit(X_train, y_train)
+        except Exception as e:
+            self.fail(f"Fitting Logistic Regression raised {type(e)} unexpectedly!")
+
+    def test_predict_from_classifier(self):
+        """Test if the classifier predicts without errors."""
+        X_train, X_test, y_train, y_test = self._get_partition_data()
+        clf = LogisticRegression()
+        clf.fit(X_train, y_train)
+        try:
+            _ = clf.predict(X_test)
+        except Exception as e:
+            self.fail(
+                f"Predicting using Logistic Regression model raised {type(e)} "
+                f"unexpectedly!")
+
+
+if __name__ == '__main__':
+    unittest.main()

From 4f3bb4fcb4c1691c987d4e4b3cc3916c5d39cc7a Mon Sep 17 00:00:00 2001
From: Charles Beauville <charles@flower.dev>
Date: Tue, 14 Nov 2023 11:33:20 +0100
Subject: [PATCH 3/6] Fixing deprecated-baselines CI (#2594)

---
 baselines/flwr_baselines/pyproject.toml   | 2 ++
 baselines/flwr_baselines/requirements.txt | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/baselines/flwr_baselines/pyproject.toml b/baselines/flwr_baselines/pyproject.toml
index 774d15e73c58..f5fb64744ff8 100644
--- a/baselines/flwr_baselines/pyproject.toml
+++ b/baselines/flwr_baselines/pyproject.toml
@@ -48,6 +48,7 @@ matplotlib = "^3.5.1"
 scikit-image = "^0.18.1"
 scikit-learn = "^1.2.1"
 wget = "^3.2"
+virtualenv = "^20.24.6"
 pandas = "^1.5.3"
 pyhamcrest = "^2.0.4"
 
@@ -61,6 +62,7 @@ flake8 = "==3.9.2"
 pytest = "==6.2.4"
 pytest-watch = "==4.2.0"
 types-requests = "==2.27.7"
+pydantic = "==2.4.2"
 
 [tool.isort]
 line_length = 88
diff --git a/baselines/flwr_baselines/requirements.txt b/baselines/flwr_baselines/requirements.txt
index 1dbb10a75bc2..7b90b8a9bf1f 100644
--- a/baselines/flwr_baselines/requirements.txt
+++ b/baselines/flwr_baselines/requirements.txt
@@ -15,6 +15,7 @@ matplotlib >= 3.5.0
 scikit-image >= 0.18.1
 scikit-learn >= 0.24.2
 wget >= 3.2
+virtualenv >= 20.24.6
 
 ##### dev-dependencies
 isort == 5.11.5
@@ -26,3 +27,4 @@ flake8 == 3.9.2
 pytest == 6.2.4
 pytest-watch == 4.2.0
 types-requests == 2.27.7
+pydantic ==2.4.2

From 04347b296dfbb520e3c713f4d40bbb622a93c0aa Mon Sep 17 00:00:00 2001
From: Charles Beauville <charles@flower.dev>
Date: Tue, 14 Nov 2023 19:47:55 +0100
Subject: [PATCH 4/6] Delete node locally in gRPC-rere (#2596)

---
 src/py/flwr/client/grpc_rere_client/connection.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/py/flwr/client/grpc_rere_client/connection.py b/src/py/flwr/client/grpc_rere_client/connection.py
index 3dcc147e8eca..b69228826e13 100644
--- a/src/py/flwr/client/grpc_rere_client/connection.py
+++ b/src/py/flwr/client/grpc_rere_client/connection.py
@@ -136,6 +136,8 @@ def delete_node() -> None:
         delete_node_request = DeleteNodeRequest(node=node)
         stub.DeleteNode(request=delete_node_request)
 
+        del node_store[KEY_NODE]
+
     def receive() -> Optional[TaskIns]:
         """Receive next task from server."""
         # Get Node

From e2116b051ff852b340e2a7913a969e551f020145 Mon Sep 17 00:00:00 2001
From: Charles Beauville <charles@flower.dev>
Date: Tue, 14 Nov 2023 19:52:52 +0100
Subject: [PATCH 5/6] C++ SDK: Delete local node for gRPC-rere (#2597)

---
 src/cc/flwr/src/grpc_rere.cc | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/cc/flwr/src/grpc_rere.cc b/src/cc/flwr/src/grpc_rere.cc
index c4920e986b1b..267874a7a0e2 100644
--- a/src/cc/flwr/src/grpc_rere.cc
+++ b/src/cc/flwr/src/grpc_rere.cc
@@ -19,6 +19,14 @@ std::optional<flwr::proto::Node> get_node_from_store() {
   return node->second;
 }
 
+void delete_node_from_store() {
+  std::lock_guard<std::mutex> lock(node_store_mutex);
+  auto node = node_store.find(KEY_NODE);
+  if (node == node_store.end() || !node->second.has_value()) {
+    node_store.erase(node);
+  }
+}
+
 std::optional<flwr::proto::TaskIns> get_current_task_ins() {
   std::lock_guard<std::mutex> state_lock(state_mutex);
   auto current_task_ins = state.find(KEY_TASK_INS);
@@ -80,8 +88,7 @@ void delete_node(const std::unique_ptr<flwr::proto::Fleet::Stub> &stub) {
     delete_node_request.release_node(); // Release if status is ok
   }
 
-  // TODO: Check if Node needs to be removed from local map
-  // node_store.erase(node);
+  delete_node_from_store();
 }
 
 std::optional<flwr::proto::TaskIns>

From db38b94d09d0d77e96fe99ecbe57db2a9999738a Mon Sep 17 00:00:00 2001
From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com>
Date: Wed, 15 Nov 2023 10:46:14 +0100
Subject: [PATCH 6/6] Change the settings for IidPartitioner (#2589)

---
 datasets/flwr_datasets/partitioner/iid_partitioner.py  |  2 +-
 .../flwr_datasets/partitioner/iid_partitioner_test.py  | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/datasets/flwr_datasets/partitioner/iid_partitioner.py b/datasets/flwr_datasets/partitioner/iid_partitioner.py
index 37b97468cadf..c8dbf8294fec 100644
--- a/datasets/flwr_datasets/partitioner/iid_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/iid_partitioner.py
@@ -48,5 +48,5 @@ def load_partition(self, idx: int) -> datasets.Dataset:
             single dataset partition
         """
         return self.dataset.shard(
-            num_shards=self._num_partitions, index=idx, contiguous=False
+            num_shards=self._num_partitions, index=idx, contiguous=True
         )
diff --git a/datasets/flwr_datasets/partitioner/iid_partitioner_test.py b/datasets/flwr_datasets/partitioner/iid_partitioner_test.py
index 5f851807f4bd..64c37c4e7127 100644
--- a/datasets/flwr_datasets/partitioner/iid_partitioner_test.py
+++ b/datasets/flwr_datasets/partitioner/iid_partitioner_test.py
@@ -18,7 +18,6 @@
 import unittest
 from typing import Tuple
 
-import numpy as np
 from parameterized import parameterized
 
 from datasets import Dataset
@@ -102,14 +101,15 @@ def test_load_partition_correct_data(
     ) -> None:
         """Test if the data in partition is equal to the expected."""
         dataset, partitioner = _dummy_setup(num_partitions, num_rows)
+        partition_size = num_rows // num_partitions
         partition_index = 2
         partition = partitioner.load_partition(partition_index)
         row_id = 0
         self.assertEqual(
-            partition["features"][row_id],
-            dataset[np.arange(partition_index, len(dataset), num_partitions)][
-                "features"
-            ][row_id],
+            partition[row_id]["features"],
+            # Note it's contiguous so partition_size * partition_index gets the first
+            # element of the partition of partition_index
+            dataset[partition_size * partition_index + row_id]["features"],
         )
 
     @parameterized.expand(  # type: ignore