From 6a72406b3e91765f0f24c0eb0ef8a2e1c2818792 Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Thu, 18 Jan 2024 23:05:09 +0100 Subject: [PATCH] Update pandas quickstart to use FDS (#2816) Co-authored-by: jafermarq --- examples/quickstart-pandas/README.md | 7 ++-- examples/quickstart-pandas/client.py | 46 ++++++++++++++++----- examples/quickstart-pandas/pyproject.toml | 2 +- examples/quickstart-pandas/requirements.txt | 2 +- examples/quickstart-pandas/run.sh | 6 +-- examples/quickstart-pandas/server.py | 11 +---- 6 files changed, 43 insertions(+), 31 deletions(-) diff --git a/examples/quickstart-pandas/README.md b/examples/quickstart-pandas/README.md index 2defc468c2ef..a25e6ea6ee36 100644 --- a/examples/quickstart-pandas/README.md +++ b/examples/quickstart-pandas/README.md @@ -1,6 +1,7 @@ # Flower Example using Pandas -This introductory example to Flower uses Pandas, but deep knowledge of Pandas is not necessarily required to run the example. However, it will help you understand how to adapt Flower to your use case. +This introductory example to Flower uses Pandas, but deep knowledge of Pandas is not necessarily required to run the example. However, it will help you understand how to adapt Flower to your use case. This example uses [Flower Datasets](https://flower.dev/docs/datasets/) to +download, partition and preprocess the dataset. Running this example in itself is quite easy. ## Project Setup @@ -69,13 +70,13 @@ Now you are ready to start the Flower clients which will participate in the lear Start client 1 in the first terminal: ```shell -$ python3 client.py +$ python3 client.py --node-id 0 ``` Start client 2 in the second terminal: ```shell -$ python3 client.py +$ python3 client.py --node-id 1 ``` You will see that the server is printing aggregated statistics about the dataset distributed amongst clients. Have a look to the [Flower Quickstarter documentation](https://flower.dev/docs/quickstart-pandas.html) for a detailed explanation. diff --git a/examples/quickstart-pandas/client.py b/examples/quickstart-pandas/client.py index 3feab3f6a0f4..c2f2605594d5 100644 --- a/examples/quickstart-pandas/client.py +++ b/examples/quickstart-pandas/client.py @@ -1,4 +1,4 @@ -import warnings +import argparse from typing import Dict, List, Tuple import numpy as np @@ -6,10 +6,10 @@ import flwr as fl +from flwr_datasets import FederatedDataset -df = pd.read_csv("./data/client.csv") -column_names = ["sepal length (cm)", "sepal width (cm)"] +column_names = ["sepal_length", "sepal_width"] def compute_hist(df: pd.DataFrame, col_name: str) -> np.ndarray: @@ -19,23 +19,47 @@ def compute_hist(df: pd.DataFrame, col_name: str) -> np.ndarray: # Define Flower client class FlowerClient(fl.client.NumPyClient): + def __init__(self, X: pd.DataFrame): + self.X = X + def fit( self, parameters: List[np.ndarray], config: Dict[str, str] ) -> Tuple[List[np.ndarray], int, Dict]: hist_list = [] # Execute query locally - for c in column_names: - hist = compute_hist(df, c) + for c in self.X.columns: + hist = compute_hist(self.X, c) hist_list.append(hist) return ( hist_list, - len(df), + len(self.X), {}, ) -# Start Flower client -fl.client.start_numpy_client( - server_address="127.0.0.1:8080", - client=FlowerClient(), -) +if __name__ == "__main__": + N_CLIENTS = 2 + + parser = argparse.ArgumentParser(description="Flower") + parser.add_argument( + "--node-id", + type=int, + choices=range(0, N_CLIENTS), + required=True, + help="Specifies the node id of artificially partitioned datasets.", + ) + args = parser.parse_args() + partition_id = args.node_id + + # Load the partition data + fds = FederatedDataset(dataset="hitorilabs/iris", partitioners={"train": N_CLIENTS}) + + dataset = fds.load_partition(partition_id, "train").with_format("pandas")[:] + # Use just the specified columns + X = dataset[column_names] + + # Start Flower client + fl.client.start_numpy_client( + server_address="127.0.0.1:8080", + client=FlowerClient(X), + ) diff --git a/examples/quickstart-pandas/pyproject.toml b/examples/quickstart-pandas/pyproject.toml index de20eaf61d63..6229210d6488 100644 --- a/examples/quickstart-pandas/pyproject.toml +++ b/examples/quickstart-pandas/pyproject.toml @@ -12,6 +12,6 @@ maintainers = ["The Flower Authors "] [tool.poetry.dependencies] python = ">=3.8,<3.11" flwr = ">=1.0,<2.0" +flwr-datasets = { extras = ["vision"], version = ">=0.0.2,<1.0.0" } numpy = "1.23.2" pandas = "2.0.0" -scikit-learn = "1.3.1" diff --git a/examples/quickstart-pandas/requirements.txt b/examples/quickstart-pandas/requirements.txt index 14308a55faaf..d44a3c6adab9 100644 --- a/examples/quickstart-pandas/requirements.txt +++ b/examples/quickstart-pandas/requirements.txt @@ -1,4 +1,4 @@ flwr>=1.0, <2.0 +flwr-datasets[vision]>=0.0.2, <1.0.0 numpy==1.23.2 pandas==2.0.0 -scikit-learn==1.3.1 diff --git a/examples/quickstart-pandas/run.sh b/examples/quickstart-pandas/run.sh index 6b85ce30bf45..571fa8bfb3e4 100755 --- a/examples/quickstart-pandas/run.sh +++ b/examples/quickstart-pandas/run.sh @@ -2,13 +2,9 @@ echo "Starting server" python server.py & sleep 3 # Sleep for 3s to give the server enough time to start -# Download data -mkdir -p ./data -python -c "from sklearn.datasets import load_iris; load_iris(as_frame=True)['data'].to_csv('./data/client.csv')" - for i in `seq 0 1`; do echo "Starting client $i" - python client.py & + python client.py --node-id ${i} & done # This will allow you to use CTRL+C to stop all background processes diff --git a/examples/quickstart-pandas/server.py b/examples/quickstart-pandas/server.py index c82304374836..af4c2a796788 100644 --- a/examples/quickstart-pandas/server.py +++ b/examples/quickstart-pandas/server.py @@ -1,5 +1,4 @@ -import pickle -from typing import Callable, Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Tuple, Union import numpy as np @@ -9,9 +8,6 @@ EvaluateRes, FitIns, FitRes, - Metrics, - MetricsAggregationFn, - NDArrays, Parameters, Scalar, ndarrays_to_parameters, @@ -23,11 +19,6 @@ class FedAnalytics(Strategy): - def __init__( - self, compute_fns: List[Callable] = None, col_names: List[str] = None - ) -> None: - super().__init__() - def initialize_parameters( self, client_manager: Optional[ClientManager] = None ) -> Optional[Parameters]: