diff --git a/examples/sklearn-logreg-mnist/README.md b/examples/sklearn-logreg-mnist/README.md index 79ed63a64233..ee3cdfc9768e 100644 --- a/examples/sklearn-logreg-mnist/README.md +++ b/examples/sklearn-logreg-mnist/README.md @@ -1,7 +1,7 @@ # Flower Example using scikit-learn This example of Flower uses `scikit-learn`'s `LogisticRegression` model to train a federated learning system. It will help you understand how to adapt Flower for use with `scikit-learn`. -Running this example in itself is quite easy. +Running this example in itself is quite easy. This example uses [Flower Datasets](https://flower.dev/docs/datasets/) to download, partition and preprocess the MNIST dataset. ## Project Setup @@ -57,18 +57,24 @@ Afterwards you are ready to start the Flower server as well as the clients. You poetry run python3 server.py ``` -Now you are ready to start the Flower clients which will participate in the learning. To do so simply open two more terminals and run the following command in each: +Now you are ready to start the Flower clients which will participate in the learning. To do so simply open two or more terminals and run the following command in each: + +Start client 1 in the first terminal: ```shell -poetry run python3 client.py +python3 client.py --node-id 0 # or any integer in {0-9} ``` -Alternatively you can run all of it in one shell as follows: +Start client 2 in the second terminal: ```shell -poetry run python3 server.py & -poetry run python3 client.py & -poetry run python3 client.py +python3 client.py --node-id 1 # or any integer in {0-9} +``` + +Alternatively, you can run all of it in one shell as follows: + +```bash +bash run.sh ``` You will see that Flower is starting a federated training. diff --git a/examples/sklearn-logreg-mnist/client.py b/examples/sklearn-logreg-mnist/client.py index dbf0f2f462a7..a5fcaba87409 100644 --- a/examples/sklearn-logreg-mnist/client.py +++ b/examples/sklearn-logreg-mnist/client.py @@ -1,19 +1,35 @@ +import argparse import warnings -import flwr as fl -import numpy as np from sklearn.linear_model import LogisticRegression from sklearn.metrics import log_loss +import flwr as fl import utils +from flwr_datasets import FederatedDataset if __name__ == "__main__": - # Load MNIST dataset from https://www.openml.org/d/554 - (X_train, y_train), (X_test, y_test) = utils.load_mnist() + N_CLIENTS = 10 + + parser = argparse.ArgumentParser(description="Flower") + parser.add_argument( + "--node-id", + type=int, + choices=range(0, N_CLIENTS), + required=True, + help="Specifies the artificial data partition", + ) + args = parser.parse_args() + partition_id = args.node_id + + # Load the partition data + fds = FederatedDataset(dataset="mnist", partitioners={"train": N_CLIENTS}) - # Split train set into 10 partitions and randomly use one for training. - partition_id = np.random.choice(10) - (X_train, y_train) = utils.partition(X_train, y_train, 10)[partition_id] + dataset = fds.load_partition(partition_id, "train").with_format("numpy") + X, y = dataset["image"].reshape((len(dataset), -1)), dataset["label"] + # Split the on edge data: 80% train, 20% test + X_train, X_test = X[: int(0.8 * len(X))], X[int(0.8 * len(X)) :] + y_train, y_test = y[: int(0.8 * len(y))], y[int(0.8 * len(y)) :] # Create LogisticRegression Model model = LogisticRegression( diff --git a/examples/sklearn-logreg-mnist/pyproject.toml b/examples/sklearn-logreg-mnist/pyproject.toml index 7c13b3f3d492..8ea49fe187a2 100644 --- a/examples/sklearn-logreg-mnist/pyproject.toml +++ b/examples/sklearn-logreg-mnist/pyproject.toml @@ -13,7 +13,7 @@ authors = [ [tool.poetry.dependencies] python = "^3.8" -flwr = "^1.0.0" +flwr = ">=1.0,<2.0" # flwr = { path = "../../", develop = true } # Development +flwr-datasets = { extras = ["vision"], version = ">=0.0.2,<1.0.0" } scikit-learn = "^1.1.1" -openml = "^0.12.2" diff --git a/examples/sklearn-logreg-mnist/requirements.txt b/examples/sklearn-logreg-mnist/requirements.txt index eec2e1a3c4bd..50da9ace3630 100644 --- a/examples/sklearn-logreg-mnist/requirements.txt +++ b/examples/sklearn-logreg-mnist/requirements.txt @@ -1,4 +1,4 @@ -flwr~=1.4.0 +flwr>=1.0, <2.0 +flwr-datasets[vision]>=0.0.2, <1.0.0 numpy~=1.21.1 -openml~=0.13.1 scikit_learn~=1.2.2 diff --git a/examples/sklearn-logreg-mnist/run.sh b/examples/sklearn-logreg-mnist/run.sh index c64f362086aa..48cee1b41b74 100755 --- a/examples/sklearn-logreg-mnist/run.sh +++ b/examples/sklearn-logreg-mnist/run.sh @@ -1,15 +1,17 @@ #!/bin/bash +set -e +cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/ echo "Starting server" python server.py & sleep 3 # Sleep for 3s to give the server enough time to start -for i in `seq 0 1`; do +for i in $(seq 0 1); do echo "Starting client $i" - python client.py & + python client.py --node-id "${i}" & done # This will allow you to use CTRL+C to stop all background processes -trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM +trap 'trap - SIGTERM && kill -- -$$' SIGINT SIGTERM # Wait for all background processes to complete wait diff --git a/examples/sklearn-logreg-mnist/server.py b/examples/sklearn-logreg-mnist/server.py index 77e7a89dd668..8541100c3a26 100644 --- a/examples/sklearn-logreg-mnist/server.py +++ b/examples/sklearn-logreg-mnist/server.py @@ -4,6 +4,8 @@ from sklearn.linear_model import LogisticRegression from typing import Dict +from flwr_datasets import FederatedDataset + def fit_round(server_round: int) -> Dict: """Send round number to client.""" @@ -14,7 +16,9 @@ def get_evaluate_fn(model: LogisticRegression): """Return an evaluation function for server-side evaluation.""" # Load test data here to avoid the overhead of doing it in `evaluate` itself - _, (X_test, y_test) = utils.load_mnist() + fds = FederatedDataset(dataset="mnist", partitioners={"train": 10}) + dataset = fds.load_full("test").with_format("numpy") + X_test, y_test = dataset["image"].reshape((len(dataset), -1)), dataset["label"] # The `evaluate` function will be called after every round def evaluate(server_round, parameters: fl.common.NDArrays, config): diff --git a/examples/sklearn-logreg-mnist/utils.py b/examples/sklearn-logreg-mnist/utils.py index 6a6d6c12ac73..b279a0d1a4b3 100644 --- a/examples/sklearn-logreg-mnist/utils.py +++ b/examples/sklearn-logreg-mnist/utils.py @@ -1,16 +1,11 @@ -from typing import Tuple, Union, List import numpy as np from sklearn.linear_model import LogisticRegression -import openml -XY = Tuple[np.ndarray, np.ndarray] -Dataset = Tuple[XY, XY] -LogRegParams = Union[XY, Tuple[np.ndarray]] -XYList = List[XY] +from flwr.common import NDArrays -def get_model_parameters(model: LogisticRegression) -> LogRegParams: - """Returns the paramters of a sklearn LogisticRegression model.""" +def get_model_parameters(model: LogisticRegression) -> NDArrays: + """Returns the parameters of a sklearn LogisticRegression model.""" if model.fit_intercept: params = [ model.coef_, @@ -23,9 +18,7 @@ def get_model_parameters(model: LogisticRegression) -> LogRegParams: return params -def set_model_params( - model: LogisticRegression, params: LogRegParams -) -> LogisticRegression: +def set_model_params(model: LogisticRegression, params: NDArrays) -> LogisticRegression: """Sets the parameters of a sklean LogisticRegression model.""" model.coef_ = params[0] if model.fit_intercept: @@ -47,32 +40,3 @@ def set_initial_params(model: LogisticRegression): model.coef_ = np.zeros((n_classes, n_features)) if model.fit_intercept: model.intercept_ = np.zeros((n_classes,)) - - -def load_mnist() -> Dataset: - """Loads the MNIST dataset using OpenML. - - OpenML dataset link: https://www.openml.org/d/554 - """ - mnist_openml = openml.datasets.get_dataset(554) - Xy, _, _, _ = mnist_openml.get_data(dataset_format="array") - X = Xy[:, :-1] # the last column contains labels - y = Xy[:, -1] - # First 60000 samples consist of the train set - x_train, y_train = X[:60000], y[:60000] - x_test, y_test = X[60000:], y[60000:] - return (x_train, y_train), (x_test, y_test) - - -def shuffle(X: np.ndarray, y: np.ndarray) -> XY: - """Shuffle X and y.""" - rng = np.random.default_rng() - idx = rng.permutation(len(X)) - return X[idx], y[idx] - - -def partition(X: np.ndarray, y: np.ndarray, num_partitions: int) -> XYList: - """Split X and y into a number of partitions.""" - return list( - zip(np.array_split(X, num_partitions), np.array_split(y, num_partitions)) - )