From de023a19319156214cc87bce5305b4e3c9c928ba Mon Sep 17 00:00:00 2001 From: Javier Date: Mon, 13 Nov 2023 13:49:39 +0000 Subject: [PATCH 1/6] Update README.md (#2592) --- examples/whisper-federated-finetuning/README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/examples/whisper-federated-finetuning/README.md b/examples/whisper-federated-finetuning/README.md index 712cf0e88369..e89a09519fed 100644 --- a/examples/whisper-federated-finetuning/README.md +++ b/examples/whisper-federated-finetuning/README.md @@ -140,10 +140,7 @@ python sim.py # append --num_gpus=0 if you don't have GPUs on your system # Once finished centralised evaluation loss/acc metrics will be shown -INFO flwr 2023-11-08 14:03:57,557 | app.py:229 | app_fit: metrics_centralized {'val_accuracy': [(0, 0.03977158885994791), - (1, 0.6940492887196954), (2, 0.5969745541975556), (3, 0.8794830695251452), (4, 0.9021238228811861), (5, 0.8943097575636145), - (6, 0.9047285113203767), (7, 0.9330795431777199), (8, 0.9446002805049089), (9, 0.9556201162091765)], - 'test_accuracy': [(10, 0.9719836400817996)]} +INFO flwr 2023-11-08 14:03:57,557 | app.py:229 | app_fit: metrics_centralized {'val_accuracy': [(0, 0.03977158885994791), (1, 0.6940492887196954), (2, 0.5969745541975556), (3, 0.8794830695251452), (4, 0.9021238228811861), (5, 0.8943097575636145), (6, 0.9047285113203767), (7, 0.9330795431777199), (8, 0.9446002805049089), (9, 0.9556201162091765)], 'test_accuracy': [(10, 0.9719836400817996)]} ``` ![Global validation accuracy FL with Whisper model](_static/whisper_flower_acc.png) From c19a8d61d17cf8874bf924bf807d3dedc31aeb32 Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Tue, 14 Nov 2023 09:38:17 +0100 Subject: [PATCH 2/6] Add Scikit Learn integration tests with FDS (#2387) --- datasets/e2e/scikit-learn/pyproject.toml | 15 ++++ datasets/e2e/scikit-learn/sklearn_test.py | 94 +++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 datasets/e2e/scikit-learn/pyproject.toml create mode 100644 datasets/e2e/scikit-learn/sklearn_test.py diff --git a/datasets/e2e/scikit-learn/pyproject.toml b/datasets/e2e/scikit-learn/pyproject.toml new file mode 100644 index 000000000000..7e22644566cf --- /dev/null +++ b/datasets/e2e/scikit-learn/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = ["poetry-core>=1.4.0"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry] +name = "fds-e2e-sklearn" +version = "0.1.0" +description = "Flower Datasets with scikit-learn" +authors = ["The Flower Authors "] + +[tool.poetry.dependencies] +python = "^3.8" +flwr-datasets = { path = "./../../", extras = ["vision"] } +scikit-learn = "^1.2.0" +parameterized = "==0.9.0" diff --git a/datasets/e2e/scikit-learn/sklearn_test.py b/datasets/e2e/scikit-learn/sklearn_test.py new file mode 100644 index 000000000000..e5e6d347ee37 --- /dev/null +++ b/datasets/e2e/scikit-learn/sklearn_test.py @@ -0,0 +1,94 @@ +import unittest + +import numpy as np +from parameterized import parameterized_class +from sklearn.linear_model import LogisticRegression +from sklearn.preprocessing import StandardScaler + +from flwr_datasets import FederatedDataset + + +# Using parameterized testing, two different sets of preprocessing: +# 1. Without scaling. +# 2. With standard scaling. +@parameterized_class( + [ + {"dataset_name": "mnist", "preprocessing": None}, + {"dataset_name": "mnist", "preprocessing": StandardScaler()}, + ] +) +class FdsWithSKLearn(unittest.TestCase): + """Test Flower Datasets with Scikit-learn's Logistic Regression.""" + + dataset_name = "" + preprocessing = None + + def _get_partition_data(self): + """Retrieve partition data.""" + partition_id = 0 + fds = FederatedDataset(dataset=self.dataset_name, partitioners={"train": 10}) + partition = fds.load_partition(partition_id, "train") + partition.set_format("numpy") + partition_train_test = partition.train_test_split(test_size=0.2) + X_train, y_train = partition_train_test["train"]["image"], partition_train_test[ + "train"]["label"] + X_test, y_test = partition_train_test["test"]["image"], partition_train_test[ + "test"]["label"] + X_train = X_train.reshape(-1, 28 * 28) + X_test = X_test.reshape(-1, 28 * 28) + if self.preprocessing: + self.preprocessing.fit(X_train) + X_train = self.preprocessing.transform(X_train) + X_test = self.preprocessing.transform(X_test) + + return X_train, X_test, y_train, y_test + + def test_data_shape(self): + """Test if the data shape is maintained after preprocessing.""" + X_train, _, _, _ = self._get_partition_data() + self.assertEqual(X_train.shape, (4_800, 28 * 28)) + + def test_X_train_type(self): + """Test if the data type is correct.""" + X_train, _, _, _ = self._get_partition_data() + self.assertIsInstance(X_train, np.ndarray) + + def test_y_train_type(self): + """Test if the data type is correct.""" + _, _, y_train, _ = self._get_partition_data() + self.assertIsInstance(y_train, np.ndarray) + + def test_X_test_type(self): + """Test if the data type is correct.""" + _, X_test, _, _ = self._get_partition_data() + self.assertIsInstance(X_test, np.ndarray) + + def test_y_test_type(self): + """Test if the data type is correct.""" + _, _, _, y_test = self._get_partition_data() + self.assertIsInstance(y_test, np.ndarray) + + def test_train_classifier(self): + """Test if the classifier trains without errors.""" + X_train, X_test, y_train, y_test = self._get_partition_data() + try: + clf = LogisticRegression() + clf.fit(X_train, y_train) + except Exception as e: + self.fail(f"Fitting Logistic Regression raised {type(e)} unexpectedly!") + + def test_predict_from_classifier(self): + """Test if the classifier predicts without errors.""" + X_train, X_test, y_train, y_test = self._get_partition_data() + clf = LogisticRegression() + clf.fit(X_train, y_train) + try: + _ = clf.predict(X_test) + except Exception as e: + self.fail( + f"Predicting using Logistic Regression model raised {type(e)} " + f"unexpectedly!") + + +if __name__ == '__main__': + unittest.main() From 4f3bb4fcb4c1691c987d4e4b3cc3916c5d39cc7a Mon Sep 17 00:00:00 2001 From: Charles Beauville Date: Tue, 14 Nov 2023 11:33:20 +0100 Subject: [PATCH 3/6] Fixing deprecated-baselines CI (#2594) --- baselines/flwr_baselines/pyproject.toml | 2 ++ baselines/flwr_baselines/requirements.txt | 2 ++ 2 files changed, 4 insertions(+) diff --git a/baselines/flwr_baselines/pyproject.toml b/baselines/flwr_baselines/pyproject.toml index 774d15e73c58..f5fb64744ff8 100644 --- a/baselines/flwr_baselines/pyproject.toml +++ b/baselines/flwr_baselines/pyproject.toml @@ -48,6 +48,7 @@ matplotlib = "^3.5.1" scikit-image = "^0.18.1" scikit-learn = "^1.2.1" wget = "^3.2" +virtualenv = "^20.24.6" pandas = "^1.5.3" pyhamcrest = "^2.0.4" @@ -61,6 +62,7 @@ flake8 = "==3.9.2" pytest = "==6.2.4" pytest-watch = "==4.2.0" types-requests = "==2.27.7" +pydantic = "==2.4.2" [tool.isort] line_length = 88 diff --git a/baselines/flwr_baselines/requirements.txt b/baselines/flwr_baselines/requirements.txt index 1dbb10a75bc2..7b90b8a9bf1f 100644 --- a/baselines/flwr_baselines/requirements.txt +++ b/baselines/flwr_baselines/requirements.txt @@ -15,6 +15,7 @@ matplotlib >= 3.5.0 scikit-image >= 0.18.1 scikit-learn >= 0.24.2 wget >= 3.2 +virtualenv >= 20.24.6 ##### dev-dependencies isort == 5.11.5 @@ -26,3 +27,4 @@ flake8 == 3.9.2 pytest == 6.2.4 pytest-watch == 4.2.0 types-requests == 2.27.7 +pydantic ==2.4.2 From 04347b296dfbb520e3c713f4d40bbb622a93c0aa Mon Sep 17 00:00:00 2001 From: Charles Beauville Date: Tue, 14 Nov 2023 19:47:55 +0100 Subject: [PATCH 4/6] Delete node locally in gRPC-rere (#2596) --- src/py/flwr/client/grpc_rere_client/connection.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/py/flwr/client/grpc_rere_client/connection.py b/src/py/flwr/client/grpc_rere_client/connection.py index 3dcc147e8eca..b69228826e13 100644 --- a/src/py/flwr/client/grpc_rere_client/connection.py +++ b/src/py/flwr/client/grpc_rere_client/connection.py @@ -136,6 +136,8 @@ def delete_node() -> None: delete_node_request = DeleteNodeRequest(node=node) stub.DeleteNode(request=delete_node_request) + del node_store[KEY_NODE] + def receive() -> Optional[TaskIns]: """Receive next task from server.""" # Get Node From e2116b051ff852b340e2a7913a969e551f020145 Mon Sep 17 00:00:00 2001 From: Charles Beauville Date: Tue, 14 Nov 2023 19:52:52 +0100 Subject: [PATCH 5/6] C++ SDK: Delete local node for gRPC-rere (#2597) --- src/cc/flwr/src/grpc_rere.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/cc/flwr/src/grpc_rere.cc b/src/cc/flwr/src/grpc_rere.cc index c4920e986b1b..267874a7a0e2 100644 --- a/src/cc/flwr/src/grpc_rere.cc +++ b/src/cc/flwr/src/grpc_rere.cc @@ -19,6 +19,14 @@ std::optional get_node_from_store() { return node->second; } +void delete_node_from_store() { + std::lock_guard lock(node_store_mutex); + auto node = node_store.find(KEY_NODE); + if (node == node_store.end() || !node->second.has_value()) { + node_store.erase(node); + } +} + std::optional get_current_task_ins() { std::lock_guard state_lock(state_mutex); auto current_task_ins = state.find(KEY_TASK_INS); @@ -80,8 +88,7 @@ void delete_node(const std::unique_ptr &stub) { delete_node_request.release_node(); // Release if status is ok } - // TODO: Check if Node needs to be removed from local map - // node_store.erase(node); + delete_node_from_store(); } std::optional From db38b94d09d0d77e96fe99ecbe57db2a9999738a Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Wed, 15 Nov 2023 10:46:14 +0100 Subject: [PATCH 6/6] Change the settings for IidPartitioner (#2589) --- datasets/flwr_datasets/partitioner/iid_partitioner.py | 2 +- .../flwr_datasets/partitioner/iid_partitioner_test.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/datasets/flwr_datasets/partitioner/iid_partitioner.py b/datasets/flwr_datasets/partitioner/iid_partitioner.py index 37b97468cadf..c8dbf8294fec 100644 --- a/datasets/flwr_datasets/partitioner/iid_partitioner.py +++ b/datasets/flwr_datasets/partitioner/iid_partitioner.py @@ -48,5 +48,5 @@ def load_partition(self, idx: int) -> datasets.Dataset: single dataset partition """ return self.dataset.shard( - num_shards=self._num_partitions, index=idx, contiguous=False + num_shards=self._num_partitions, index=idx, contiguous=True ) diff --git a/datasets/flwr_datasets/partitioner/iid_partitioner_test.py b/datasets/flwr_datasets/partitioner/iid_partitioner_test.py index 5f851807f4bd..64c37c4e7127 100644 --- a/datasets/flwr_datasets/partitioner/iid_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/iid_partitioner_test.py @@ -18,7 +18,6 @@ import unittest from typing import Tuple -import numpy as np from parameterized import parameterized from datasets import Dataset @@ -102,14 +101,15 @@ def test_load_partition_correct_data( ) -> None: """Test if the data in partition is equal to the expected.""" dataset, partitioner = _dummy_setup(num_partitions, num_rows) + partition_size = num_rows // num_partitions partition_index = 2 partition = partitioner.load_partition(partition_index) row_id = 0 self.assertEqual( - partition["features"][row_id], - dataset[np.arange(partition_index, len(dataset), num_partitions)][ - "features" - ][row_id], + partition[row_id]["features"], + # Note it's contiguous so partition_size * partition_index gets the first + # element of the partition of partition_index + dataset[partition_size * partition_index + row_id]["features"], ) @parameterized.expand( # type: ignore