Merge branch 'main' into simulation_examples_with_flower_datasets

adap · Dec 5, 2023 · 16ec712 · 16ec712
2 parents b3b4f46 + c3347a4
commit 16ec712
Show file tree

Hide file tree

Showing 15 changed files with 80 additions and 68 deletions.
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -24,8 +24,8 @@ RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=/commandhisto
     && echo $SNIPPET >> "/home/$USERNAME/.bashrc"
 
 # Install system dependencies
-RUN apt update 
-RUN apt install -y curl wget gnupg python3 python-is-python3 python3-pip git \
+RUN apt-get update
+RUN apt-get install -y curl wget gnupg python3 python-is-python3 python3-pip git \
     build-essential tmux vim
 
 RUN python -m pip install \

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -1,11 +1,15 @@
 {
   "dockerFile": "Dockerfile",
-  "postCreateCommand": "poetry install --extras \"simulation\"",
-  "extensions": ["ms-python.python"],
-  "settings": {
-    "files.watcherExclude": {},
-    "search.exclude": {},
-    "terminal.integrated.defaultProfile.linux": "bash"
+  "postCreateCommand": "sudo poetry install --extras \"simulation\"",
+  "customizations": {
+    "vscode": {
+      "settings": {
+        "files.watcherExclude": { },
+        "search.exclude": { },
+        "terminal.integrated.defaultProfile.linux": "bash"
+      },
+      "extensions": [ "ms-python.python" ]
+    }
   },
   "remoteUser": "flwr-vscode",
   "containerEnv": {

diff --git a/datasets/doc/source/how-to-use-with-tensorflow.rst b/datasets/doc/source/how-to-use-with-tensorflow.rst
@@ -25,7 +25,7 @@ In case of CIFAR10, you should see the following output.
   'label': ClassLabel(names=['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog',
   'frog', 'horse', 'ship', 'truck'], id=None)}
 
-We will use the keys in the partition features in order to construct a `tf.data.Dataset <https://www.tensorflow.org/api_docs/python/tf/data/Dataset>_`. Let's move to the transformations.
+We will use the keys in the partition features in order to construct a `tf.data.Dataset <https://www.tensorflow.org/api_docs/python/tf/data/Dataset>`_. Let's move to the transformations.
 
 NumPy
 -----

diff --git a/doc/source/contributor-how-to-develop-in-vscode-dev-containers.rst b/doc/source/contributor-how-to-develop-in-vscode-dev-containers.rst
@@ -1,4 +1,4 @@
-Develop in VSCode Dev Containers 
+Develop in VSCode Dev Containers
 ================================
 
 When working on the Flower framework we want to ensure that all contributors use the same developer environment to format code or run tests. For this purpose we are using the VSCode Remote Containers extension. What is it? Read the following quote:
@@ -14,7 +14,7 @@ Source: `Official VSCode documentation <https://code.visualstudio.com/docs/remot
 Getting started
 ---------------
 
-Configuring and setting up the :code:`Dockerfile` as well the configuration for the devcontainer can be a bit more involved. The good thing is you want have to do it. Usually it should be enough to install Docker on your system and ensure its available on your command line. Additionally, install the `VSCode Containers Extension <vscode:extension/ms-vscode-remote.remote-containers>`_.
+Configuring and setting up the :code:`Dockerfile` as well the configuration for the devcontainer can be a bit more involved. The good thing is you don't have to do it. Usually it should be enough to install `Docker <https://docs.docker.com/engine/install/>`_ on your system and ensure its available on your command line. Additionally, install the `VSCode Containers Extension <vscode:extension/ms-vscode-remote.remote-containers>`_.
 
 Now you should be good to go. When starting VSCode, it will ask you to run in the container environment and - if you confirm - automatically build the container and use it. To manually instruct VSCode to use the devcontainer, you can, after installing the extension, click the green area in the bottom left corner of your VSCode window and select the option *(Re)Open Folder in Container*.
 

diff --git a/doc/source/contributor-how-to-write-documentation.rst b/doc/source/contributor-how-to-write-documentation.rst
@@ -7,7 +7,7 @@ Project layout
 
 The Flower documentation lives in the ``doc`` directory. The Sphinx-based documentation system supports both reStructuredText (``.rst`` files) and Markdown (``.md`` files).
 
-Note that, in order to build the documentation locally (with ``poetry run make html``, like described below), `Pandoc <https://pandoc.org/installing.html>_` needs to be installed on the system.
+Note that, in order to build the documentation locally (with ``poetry run make html``, like described below), `Pandoc <https://pandoc.org/installing.html>`_ needs to be installed on the system.
 
 
 Edit an existing page

diff --git a/doc/source/contributor-tutorial-get-started-as-a-contributor.rst b/doc/source/contributor-tutorial-get-started-as-a-contributor.rst
@@ -30,8 +30,8 @@ but you can change it by providing a specific :code:`<version>`)::
 
   $ ./dev/venv-create.sh <version>
 
-If you don't have :code:`pyenv` installed, 
-you can use the following script that will install pyenv, 
+If you don't have :code:`pyenv` installed,
+you can use the following script that will install pyenv,
 set it up and create the virtual environment (with :code:`Python 3.8.17` by default)::
 
   $ ./dev/setup-defaults.sh <version>
@@ -83,7 +83,7 @@ Run Github Actions (CI) locally
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Developers could run the full set of Github Actions workflows under their local
-environment by using `Act <https://github.com/nektos/act>_`. Please refer to
+environment by using `Act <https://github.com/nektos/act>`_. Please refer to
 the installation instructions under the linked repository and run the next
 command under Flower main cloned repository folder::
 
@@ -116,6 +116,6 @@ convenience script to re-build the documentation yet, but it's pretty easy::
 
 This will generate HTML documentation in ``doc/build/html``.
 
-Note that, in order to build the documentation locally 
-(with ``poetry run make html``, like described below), 
-`Pandoc <https://pandoc.org/installing.html>_` needs to be installed on the system. 
+Note that, in order to build the documentation locally
+(with ``poetry run make html``, like described below),
+`Pandoc <https://pandoc.org/installing.html>`_ needs to be installed on the system.
diff --git a/doc/source/explanation-differential-privacy.rst b/doc/source/explanation-differential-privacy.rst
@@ -4,7 +4,7 @@ Differential privacy
 Flower provides differential privacy (DP) wrapper classes for the easy integration of the central DP guarantees provided by DP-FedAvg into training pipelines defined in any of the various ML frameworks that Flower is compatible with. 
 
 .. warning::
-  Please note that these components are still experimental, the correct configuration of DP for a specific task is still an unsolved problem.
+  Please note that these components are still experimental; the correct configuration of DP for a specific task is still an unsolved problem.
 
 .. note::
   The name DP-FedAvg is misleading since it can be applied on top of any FL algorithm that conforms to the general structure prescribed by the FedOpt family of algorithms.
@@ -17,18 +17,18 @@ DP-FedAvg, originally proposed by McMahan et al. [mcmahan]_ and extended by Andr
 * **Clipping** : The influence of each client's update is bounded by clipping it. This is achieved by enforcing a cap on the L2 norm of the update, scaling it down if needed.
 * **Noising** :  Gaussian noise, calibrated to the clipping threshold, is added to the average computed at the server.
 
-The distribution of the update norm has been shown to vary from task-to-task and to evolve as training progresses. Therefore, we use an adaptive approach [andrew]_ that continuously adjusts the clipping threshold to track a prespecified quantile of the update norm distribution. 
+The distribution of the update norm has been shown to vary from task-to-task and to evolve as training progresses. This variability is crucial in understanding its impact on differential privacy guarantees, emphasizing the need for an adaptive approach [andrew]_ that continuously adjusts the clipping threshold to track a prespecified quantile of the update norm distribution.
 
 Simplifying Assumptions
 ***********************
 
-We make (and attempt to enforce) a number of assumptions that must be satisfied to ensure that the training process actually realises the :math:`(\epsilon, \delta)` guarantees the user has in mind when configuring the setup. 
+We make (and attempt to enforce) a number of assumptions that must be satisfied to ensure that the training process actually realizes the :math:`(\epsilon, \delta)` guarantees the user has in mind when configuring the setup. 
 
 * **Fixed-size subsampling** :Fixed-size subsamples of the clients must be taken at each round, as opposed to variable-sized Poisson subsamples. 
 * **Unweighted averaging** : The contributions from all the clients must weighted equally in the aggregate to eliminate the requirement for the server to know in advance the sum of the weights of all clients available for selection.
 * **No client failures** : The set of available clients must stay constant across all rounds of training. In other words, clients cannot drop out or fail. 
 
-The first two are useful for eliminating a multitude of complications associated with calibrating the noise to the clipping threshold while the third one is required to comply with the assumptions of the privacy analysis.
+The first two are useful for eliminating a multitude of complications associated with calibrating the noise to the clipping threshold, while the third one is required to comply with the assumptions of the privacy analysis.
 
 .. note::
    These restrictions are in line with constraints imposed by Andrew et al. [andrew]_.
@@ -48,15 +48,15 @@ Introducing DP to an existing workload can be thought of as adding an extra laye
 Server-side logic
 *****************
 
-The first version of our solution was to define a decorator whose constructor accepted, among other things, a boolean valued variable indicating whether adaptive clipping was to be enabled or not. We quickly realized that this would clutter its :code:`__init__()` function with variables corresponding to hyperparameters of adaptive clipping that would remain unused when it was disabled. A cleaner implementation could be achieved by splitting the functionality into two decorators, :code:`DPFedAvgFixed` and :code:`DPFedAvgAdaptive`, with the latter sub- classing the former. The constructors for both classes accept a boolean parameter :code:`server_side_noising`, which, as the name suggests, determines where noising is to be performed.
+The first version of our solution was to define a decorator whose constructor accepted, among other things, a boolean-valued variable indicating whether adaptive clipping was to be enabled or not. We quickly realized that this would clutter its :code:`__init__()` function with variables corresponding to hyperparameters of adaptive clipping that would remain unused when it was disabled. A cleaner implementation could be achieved by splitting the functionality into two decorators, :code:`DPFedAvgFixed` and :code:`DPFedAvgAdaptive`, with the latter sub- classing the former. The constructors for both classes accept a boolean parameter :code:`server_side_noising`, which, as the name suggests, determines where noising is to be performed.
 
 DPFedAvgFixed
 :::::::::::::
 
 The server-side capabilities required for the original version of DP-FedAvg, i.e., the one which performed fixed clipping, can be completely captured with the help of wrapper logic for just the following two methods of the :code:`Strategy` abstract class.
 
 #. :code:`configure_fit()` : The config dictionary being sent by the wrapped :code:`Strategy` to each client needs to be augmented with an additional value equal to the clipping threshold (keyed under :code:`dpfedavg_clip_norm`) and, if :code:`server_side_noising=true`, another one equal to the scale of the Gaussian noise that needs to be added at the client (keyed under :code:`dpfedavg_noise_stddev`). This entails *post*-processing of the results returned by the wrappee's implementation of :code:`configure_fit()`.
-#. :code:`aggregate_fit()`: We check whether any of the sampled clients dropped out or failed to upload an update before the round timed out. In that case, we need to abort the current round, discarding any successful updates that were received, and move on to the next one. On the other hand, if all clients responded successfully, we must force the averaging of the updates to happen in an unweighted manner by intercepting the :code:`parameters` field of :code:`FitRes` for each received update and setting it to 1. Furthermore, if :code:`server_side_noising=true`, each update is perturbed with an amount of noise equal to what it would have been subjected to had client-side noising being enabled.  This entails *pre*-processing of the arguments to this method before passing them on to the wrappee's implementation of :code:`aggregate_fit()`.
+#. :code:`aggregate_fit()`: We check whether any of the sampled clients dropped out or failed to upload an update before the round timed out. In that case, we need to abort the current round, discarding any successful updates that were received, and move on to the next one. On the other hand, if all clients responded successfully, we must force the averaging of the updates to happen in an unweighted manner by intercepting the :code:`parameters` field of :code:`FitRes` for each received update and setting it to 1. Furthermore, if :code:`server_side_noising=true`, each update is perturbed with an amount of noise equal to what it would have been subjected to had client-side noising being enabled. This entails *pre*-processing of the arguments to this method before passing them on to the wrappee's implementation of :code:`aggregate_fit()`.
 
 .. note::
   We can't directly change the aggregation function of the wrapped strategy to force it to add noise to the aggregate, hence we simulate client-side noising to implement server-side noising. 
@@ -95,6 +95,6 @@ Assume you have trained for :math:`n` rounds with sampling fraction :math:`q` an
    rdp = tfp.compute_rdp_sample_without_replacement(q, z, n, orders)
    eps, _, _ = tfp.rdp_accountant.get_privacy_spent(rdp, target_delta=delta)
 
-.. [mcmahan] McMahan, H. Brendan, et al. "Learning differentially private recurrent language models." arXiv preprint arXiv:1710.06963 (2017).
+.. [mcmahan] McMahan et al. "Learning Differentially Private Recurrent Language Models." International Conference on Learning Representations (ICLR), 2017.
 
-.. [andrew] Andrew, Galen, et al. "Differentially private learning with adaptive clipping." Advances in Neural Information Processing Systems 34 (2021): 17455-17466.
+.. [andrew] Andrew, Galen, et al. "Differentially Private Learning with Adaptive Clipping." Advances in Neural Information Processing Systems (NeurIPS), 2021.
diff --git a/examples/vertical-fl/simulation.py b/examples/vertical-fl/simulation.py
@@ -22,4 +22,4 @@ def client_fn(cid):
 
 results_dir = Path("_static/results")
 results_dir.mkdir(exist_ok=True)
-np.save(str(results_dir/"hist.npy"), hist)
+np.save(str(results_dir / "hist.npy"), hist)
diff --git a/examples/xgboost-quickstart/pyproject.toml b/examples/xgboost-quickstart/pyproject.toml
@@ -10,6 +10,6 @@ authors = ["The Flower Authors <[email protected]>"]
 
 [tool.poetry.dependencies]
 python = ">=3.8,<3.11"
-flwr-nightly = ">=1.0,<2.0"
+flwr = ">=1.6.0,<2.0"
 flwr-datasets = ">=0.0.1,<1.0.0"
 xgboost = ">=2.0.0,<3.0.0"
diff --git a/examples/xgboost-quickstart/requirements.txt b/examples/xgboost-quickstart/requirements.txt
@@ -1,3 +1,3 @@
-flwr-nightly>=1.0, <2.0
+flwr>=1.6.0, <2.0
 flwr-datasets>=0.0.1, <1.0.0
 xgboost>=2.0.0, <3.0.0
diff --git a/src/docker/server/Dockerfile b/src/docker/server/Dockerfile
@@ -1,8 +1,20 @@
 # Copyright 2023 Flower Labs GmbH. All Rights Reserved.
 
-FROM ubuntu:22.04 as base
+ARG UBUNTU_VERSION=22.04
+FROM ubuntu:$UBUNTU_VERSION as base
 
 ENV DEBIAN_FRONTEND noninteractive
+# Send stdout and stderr stream directly to the terminal. Ensures that no
+# output is retained in a buffer if the application crashes.
+ENV PYTHONUNBUFFERED 1
+# Typically, bytecode is created on the first invocation to speed up following invocation.
+# However, in Docker we only make a single invocation (when we start the container).
+# Therefore, we can disable bytecode writing.
+ENV PYTHONDONTWRITEBYTECODE 1
+# Ensure that python encoding is always UTF-8.
+ENV PYTHONIOENCODING UTF-8
+ENV LANG C.UTF-8
+ENV LC_ALL C.UTF-8
 
 # Install system dependencies
 RUN apt-get update \
@@ -17,32 +29,28 @@ RUN apt-get update \
 ARG PYTHON_VERSION
 ENV PYENV_ROOT /root/.pyenv
 ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
+# https://github.com/hadolint/hadolint/wiki/DL4006
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 RUN curl -L https://github.com/pyenv/pyenv-installer/raw/master/bin/pyenv-installer | bash
 RUN pyenv install ${PYTHON_VERSION} \
     && pyenv global ${PYTHON_VERSION} \
     && pyenv rehash
 
 # Install specific version of pip
 ARG PIP_VERSION
-RUN python -m pip install pip==$PIP_VERSION
+RUN python -m pip install --no-cache-dir pip==$PIP_VERSION
 
 # Install specific version of setuptools
 ARG SETUPTOOLS_VERSION
-RUN python -m pip install setuptools==$SETUPTOOLS_VERSION
-
-# Install poetry as all examples use it and therefore it should be available for custom images
-ARG POETRY_VERSION
-RUN curl -sSL https://install.python-poetry.org | python3 - --version ${POETRY_VERSION}
-ENV PATH /root/.local/bin:$PATH
-RUN poetry config virtualenvs.create false
+RUN python -m pip install --no-cache-dir setuptools==$SETUPTOOLS_VERSION
 
 # Server image
 FROM base as server
 
 WORKDIR /app
 ARG FLWR_VERSION
-RUN python -m pip install -U flwr[rest]==${FLWR_VERSION}
-ENTRYPOINT ["python", "-c", "from flwr.server import run_server\nrun_server()"]
+RUN python -m pip install -U --no-cache-dir flwr[rest]==${FLWR_VERSION}
+ENTRYPOINT ["python", "-c", "from flwr.server import run_server; run_server()"]
 
 # Test if Flower can be successfully installed and imported
 FROM server as test

diff --git a/src/py/flwr/client/app.py b/src/py/flwr/client/app.py
@@ -23,8 +23,8 @@
 from typing import Callable, ContextManager, Optional, Tuple, Union
 
 from flwr.client.client import Client
-from flwr.client.flower import Bwd, Flower, Fwd
-from flwr.client.typing import ClientFn
+from flwr.client.flower import Flower
+from flwr.client.typing import Bwd, ClientFn, Fwd
 from flwr.common import GRPC_MAX_MESSAGE_LENGTH, EventType, event
 from flwr.common.address import parse_address
 from flwr.common.constant import (

diff --git a/src/py/flwr/client/flower.py b/src/py/flwr/client/flower.py
@@ -16,32 +16,10 @@
 
 
 import importlib
-from dataclasses import dataclass
-from typing import Callable, cast
+from typing import cast
 
 from flwr.client.message_handler.message_handler import handle
-from flwr.client.typing import ClientFn
-from flwr.client.workload_state import WorkloadState
-from flwr.proto.task_pb2 import TaskIns, TaskRes
-
-
-@dataclass
-class Fwd:
-    """."""
-
-    task_ins: TaskIns
-    state: WorkloadState
-
-
-@dataclass
-class Bwd:
-    """."""
-
-    task_res: TaskRes
-    state: WorkloadState
-
-
-FlowerCallable = Callable[[Fwd], Bwd]
+from flwr.client.typing import Bwd, ClientFn, Fwd
 
 
 class Flower:

diff --git a/src/py/flwr/client/typing.py b/src/py/flwr/client/typing.py
@@ -14,8 +14,30 @@
 # ==============================================================================
 """Custom types for Flower clients."""
 
+from dataclasses import dataclass
 from typing import Callable
 
+from flwr.client.workload_state import WorkloadState
+from flwr.proto.task_pb2 import TaskIns, TaskRes
+
 from .client import Client as Client
 
+
+@dataclass
+class Fwd:
+    """."""
+
+    task_ins: TaskIns
+    state: WorkloadState
+
+
+@dataclass
+class Bwd:
+    """."""
+
+    task_res: TaskRes
+    state: WorkloadState
+
+
+FlowerCallable = Callable[[Fwd], Bwd]
 ClientFn = Callable[[str], Client]
diff --git a/src/py/flwr/flower/__init__.py b/src/py/flwr/flower/__init__.py
@@ -15,9 +15,9 @@
 """Flower callable package."""
 
 
-from flwr.client.flower import Bwd as Bwd
 from flwr.client.flower import Flower as Flower
-from flwr.client.flower import Fwd as Fwd
+from flwr.client.typing import Bwd as Bwd
+from flwr.client.typing import Fwd as Fwd
 
 __all__ = [
     "Flower",