From 2f31ef288e6f4fab975375b0df2e03de2cd16474 Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Tue, 16 Jan 2024 12:49:26 +0100
Subject: [PATCH] Improve documentation

---
 .../partitioner/dirichlet_partitioner.py        | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py b/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py
index de53b0928af6..ab118ba27a80 100644
--- a/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py
@@ -26,11 +26,20 @@
 class DirichletPartitioner(Partitioner):  # pylint: disable=R0902
     """Partitioner based on Dirichlet distribution.
 
-    The balancing (not mentioned in paper but implemented in the code) is controlled by
-    `self_balancing` parameter.
-
     Implementation based on Bayesian Nonparametric Federated Learning of Neural Networks
-    https://arxiv.org/abs/1905.12022
+    https://arxiv.org/abs/1905.12022.
+
+    The algorithm sequentially divides the data with each label. The fractions of the
+    data with each label is drawn from Dirichlet distribution and adjusted in case of
+    balancing. The data is assigned. In case the `min_partition_size` is not satisfied
+    the algorithm is run again (the fractions will change since it is a random process
+    even though the alpha stays the same).
+
+    The notion of balancing is explicitly introduced here (not mentioned in paper but
+    implemented in the code). It is a mechanism that excludes the node from
+    assigning new samples to it if the current number of samples on that node exceeds
+    the average number that the node would get in case of even data distribution.
+    It is controlled by`self_balancing` parameter.
 
     Parameters
     ----------