From 2f31ef288e6f4fab975375b0df2e03de2cd16474 Mon Sep 17 00:00:00 2001 From: Adam Narozniak <adam@flower.dev> Date: Tue, 16 Jan 2024 12:49:26 +0100 Subject: [PATCH] Improve documentation --- .../partitioner/dirichlet_partitioner.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py b/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py index de53b0928af6..ab118ba27a80 100644 --- a/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py +++ b/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py @@ -26,11 +26,20 @@ class DirichletPartitioner(Partitioner): # pylint: disable=R0902 """Partitioner based on Dirichlet distribution. - The balancing (not mentioned in paper but implemented in the code) is controlled by - `self_balancing` parameter. - Implementation based on Bayesian Nonparametric Federated Learning of Neural Networks - https://arxiv.org/abs/1905.12022 + https://arxiv.org/abs/1905.12022. + + The algorithm sequentially divides the data with each label. The fractions of the + data with each label is drawn from Dirichlet distribution and adjusted in case of + balancing. The data is assigned. In case the `min_partition_size` is not satisfied + the algorithm is run again (the fractions will change since it is a random process + even though the alpha stays the same). + + The notion of balancing is explicitly introduced here (not mentioned in paper but + implemented in the code). It is a mechanism that excludes the node from + assigning new samples to it if the current number of samples on that node exceeds + the average number that the node would get in case of even data distribution. + It is controlled by`self_balancing` parameter. Parameters ----------