Merge branch 'main' into fds-docs-how-to

adap · Sep 20, 2023 · b78aaa6 · b78aaa6
2 parents 50c6cb6 + 051bf1e
commit b78aaa6
Show file tree

Hide file tree

Showing 12 changed files with 507 additions and 23 deletions.
diff --git a/.github/workflows/datasets.yml b/.github/workflows/datasets.yml
@@ -0,0 +1,42 @@
+name: Datasets
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+defaults:
+  run:
+    working-directory: datasets
+
+jobs:
+  test_core:
+    runs-on: ubuntu-22.04
+    strategy:
+      matrix:
+        # Latest version which comes cached in the host image can be found here:
+        # https://github.com/actions/runner-images/blob/main/images/linux/Ubuntu2204-Readme.md#python
+        # In case of a mismatch, the job has to download Python to install it.
+        # Note: Due to a bug in actions/setup-python we have to put 3.10 in
+        # qoutes as it will otherwise will assume 3.1
+        python: [3.8, 3.9, '3.10']
+
+    name: Python ${{ matrix.python }}
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Bootstrap
+        uses: ./.github/actions/bootstrap
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Install dependencies (mandatory only)
+        run: python -m poetry install --all-extras
+      - name: Test (formatting + unit tests)
+        run: ./dev/test.sh
diff --git a/baselines/fedprox/README.md b/baselines/fedprox/README.md
@@ -59,6 +59,13 @@ The following table shows the main hyperparameters for this baseline with their
 To construct the Python environment, simply run:
 
 ```bash
+# Set directory to use python 3.10 (install with `pyenv install <version>` if you don't have it)
+pyenv local 3.10.12
+
+# Tell poetry to use python3.10
+poetry env use 3.10.12
+
+# Install
 poetry install
 ```
 
@@ -97,6 +104,6 @@ python -m fedprox.main --multirun mu=0.0,2.0 stragglers_fraction=0.0,0.5,0.9 '+r
 python -m fedprox.main --config-name fedavg --multirun stragglers_fraction=0.0,0.5,0.9 '+repeat_num=range(5)'
 ```
 
-The above commands would generate results that you can plot and would look like:
+The above commands would generate results that you can plot and would look like the plot shown below. This plot was generated using the jupyter notebook in the `docs/` directory of this baseline after running the `--multirun` commands above.
 
-![](docs/FedProx_mnist.png)
+![](_static/FedProx_mnist.png)
diff --git a/baselines/fedprox/_static/FedProx_mnist.png b/baselines/fedprox/_static/FedProx_mnist.png
diff --git a/baselines/fedprox/docs/viz_and_plot_results.ipynb b/baselines/fedprox/docs/viz_and_plot_results.ipynb
diff --git a/baselines/fedprox/fedprox/models.py b/baselines/fedprox/fedprox/models.py
@@ -55,8 +55,9 @@ class LogisticRegression(nn.Module):
 
     As described in the Li et al., 2020 paper :
 
-    [Federated Optimization in Heterogeneous Networks]
-    (https://arxiv.org/pdf/1812.06127.pdf)
+    [Federated Optimization in Heterogeneous Networks] (
+
+    https://arxiv.org/pdf/1812.06127.pdf)
     """
 
     def __init__(self, num_classes: int) -> None:
@@ -153,7 +154,7 @@ def _train_one_epoch(  # pylint: disable=too-many-arguments
         optimizer.zero_grad()
         proximal_term = 0.0
         for local_weights, global_weights in zip(net.parameters(), global_params):
-            proximal_term += (local_weights - global_weights).norm(2)
+            proximal_term += torch.square((local_weights - global_weights).norm(2))
         loss = criterion(net(images), labels) + (proximal_mu / 2) * proximal_term
         loss.backward()
         optimizer.step()

diff --git a/baselines/fedprox/pyproject.toml b/baselines/fedprox/pyproject.toml
@@ -41,6 +41,8 @@ python = ">=3.10.0, <3.11.0"
 flwr = { extras = ["simulation"], version = "1.5.0" }
 hydra-core = "1.3.2"
 matplotlib = "3.7.1"
+jupyter = "^1.0.0"
+pandas = "^2.0.3"
 torch = { url = "https://download.pytorch.org/whl/cu117/torch-2.0.1%2Bcu117-cp310-cp310-linux_x86_64.whl"}
 torchvision = { url = "https://download.pytorch.org/whl/cu117/torchvision-0.15.2%2Bcu117-cp310-cp310-linux_x86_64.whl"}
 

diff --git a/datasets/dev/test.sh b/datasets/dev/test.sh
@@ -2,6 +2,10 @@
 set -e
 cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/../
 
+# Append path to PYTHONPATH that makes flwr_tool.init_py_check discoverable
+PARENT_DIR=$(dirname "$(pwd)") # Go one dir up from flower/datasets
+export PYTHONPATH="${PYTHONPATH}:${PARENT_DIR}/src/py"
+
 echo "=== test.sh ==="
 
 echo "- Start Python checks"

diff --git a/datasets/doc/source/ref-api-flwr-datasets.rst b/datasets/doc/source/ref-api-flwr-datasets.rst
@@ -1,39 +1,27 @@
 flwr\_datasets (Python API reference)
 ======================
 
-federated\_dataset
-------------------
-
-.. automodule:: flwr_datasets.federated_dataset
-
 Federated Dataset
 -----------------
 .. autoclass:: flwr_datasets.federated_dataset.FederatedDataset
    :members:
 
+
 partitioner
 -----------
 
 .. automodule:: flwr_datasets.partitioner
 
-partitioner.partitioner
------------------------
-
-.. automodule:: flwr_datasets.partitioner.partitioner
 
 Partitioner
 -----------
 
-.. autoclass:: flwr_datasets.partitioner.partitioner.Partitioner
+.. autoclass:: flwr_datasets.partitioner.Partitioner
    :members:
 
-partitioner.iid_partitioner
----------------------------
-
-.. automodule:: flwr_datasets.partitioner.iid_partitioner
 
 IID Partitioner
 ---------------
 
-.. autoclass:: flwr_datasets.partitioner.iid_partitioner.IidPartitioner
+.. autoclass:: flwr_datasets.partitioner.IidPartitioner
    :members:
diff --git a/datasets/doc/source/tutorial-quickstart.rst b/datasets/doc/source/tutorial-quickstart.rst
@@ -0,0 +1,72 @@
+Quickstart
+==========
+
+Run Flower Datasets as fast as possible by learning only the essentials.
+
+Install Federated Datasets
+--------------------------
+Run on the command line::
+
+  python -m pip install flwr-datasets[vision]
+
+Install the ML framework
+------------------------
+TensorFlow::
+
+  pip install tensorflow
+
+PyTorch::
+
+  pip install torch torchvision
+
+Choose the dataset
+------------------
+Choose the dataset by going to Hugging Face `Datasets Hub <https://huggingface.co/datasets>`_ and searching for your
+dataset by name. Note that the name is case sensitive, so make sure to pass the correct name as the `dataset` parameter
+to `FederatedDataset`.
+
+Partition the dataset
+---------------------
+::
+
+  from flwr_datasets import FederatedDataset
+
+  fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
+  partition = fds.load_partition(0, "train")
+  centralized_dataset = fds.load_full("test")
+
+Now you're ready to go. You have ten partitions created from the train split of the MNIST dataset and the test split
+for the centralized evaluation. We will convert the type of the dataset from Hugging Face's Dataset type to the one
+supported by your framework.
+
+Conversion
+----------
+For more detailed instructions, go to :doc:`how-to`.
+
+PyTorch DataLoader
+^^^^^^^^^^^^^^^^^^
+Transform the Dataset directly into the DataLoader::
+
+  from torch.utils.data import DataLoader
+  from torchvision.transforms import ToTensor
+
+  transforms = ToTensor()
+  partition_torch = partition.map(
+        lambda img: {"img": transforms(img)}, input_columns="img"
+    ).with_format("torch")
+  dataloader = DataLoader(partition_torch, batch_size=64)
+
+NumPy
+^^^^^
+NumPy can be used as input to the TensorFlow model and is very straightforward::
+
+   partition_np = partition.with_format("numpy")
+   X_train, y_train = partition_np["img"], partition_np["label"]
+
+TensorFlow Dataset
+^^^^^^^^^^^^^^^^^^
+Transformation to TensorFlow Dataset is a one-liner::
+
+  tf_dataset = partition.to_tf_dataset(columns="img", label_cols="label", batch_size=64,
+                                     shuffle=True)
+
diff --git a/datasets/flwr_datasets/partitioner/iid_partitioner.py b/datasets/flwr_datasets/partitioner/iid_partitioner.py
@@ -35,7 +35,18 @@ def __init__(self, num_partitions: int) -> None:
         self._num_partitions = num_partitions
 
     def load_partition(self, idx: int) -> datasets.Dataset:
-        """Load a single IID partition based on the partition index."""
+        """Load a single IID partition based on the partition index.
+
+        Parameters
+        ----------
+        idx: int
+            the index that corresponds to the requested partition
+
+        Returns
+        -------
+        dataset_partition: Dataset
+            single dataset partition
+        """
         return self.dataset.shard(
             num_shards=self._num_partitions, index=idx, contiguous=True
         )
diff --git a/doc/source/ref-changelog.md b/doc/source/ref-changelog.md
@@ -14,7 +14,7 @@
 
 - **Update Flower Baselines**
 
-  - FedProx ([#2286](https://github.com/adap/flower/pull/2286))
+  - FedProx ([#2210](https://github.com/adap/flower/pull/2210), [#2286](https://github.com/adap/flower/pull/2286))
 
 - **General updates to baselines** ([#2301](https://github.com/adap/flower/pull/2301).[#2305](https://github.com/adap/flower/pull/2305), [#2307](https://github.com/adap/flower/pull/2307), [#2327](https://github.com/adap/flower/pull/2327))
 

diff --git a/doc/source/tutorial-series-what-is-federated-learning.ipynb b/doc/source/tutorial-series-what-is-federated-learning.ipynb
@@ -15,7 +15,7 @@
     "\n",
     "> [Star Flower on GitHub](https://github.com/adap/flower) ⭐️ and join the open-source Flower community on Slack to connect, ask questions, and get help: [Join Slack](https://flower.dev/join-slack) 🌼 We'd love to hear from you in the `#introductions` channel! And if anything is unclear, head over to the `#questions` channel.\n",
     "\n",
-    "Let's get stated!"
+    "Let's get started!"
    ]
   },
   {
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,7 +15,7 @@ @@
         "\n",
         "> [Star Flower on GitHub](https://github.com/adap/flower) ⭐️ and join the open-source Flower community on Slack to connect, ask questions, and get help: [Join Slack](https://flower.dev/join-slack) 🌼 We'd love to hear from you in the `#introductions` channel! And if anything is unclear, head over to the `#questions` channel.\n",
         "\n",
-        "Let's get stated!"
+        "Let's get started!"
        ]
       },
       {
@@ Expand Down @@