From dd02021739ce7ab51702379790a6780123db62e7 Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Mon, 23 Oct 2023 16:40:25 +0200
Subject: [PATCH 01/17] Add size partitioner

---
 .../partitioner/size_partitioner.py           | 135 ++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 datasets/flwr_datasets/partitioner/size_partitioner.py

diff --git a/datasets/flwr_datasets/partitioner/size_partitioner.py b/datasets/flwr_datasets/partitioner/size_partitioner.py
new file mode 100644
index 000000000000..e471a6836c7e
--- /dev/null
+++ b/datasets/flwr_datasets/partitioner/size_partitioner.py
@@ -0,0 +1,135 @@
+# Copyright 2023 Flower Labs GmbH. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""DataQuantityPartitioner class."""
+from typing import Callable, Dict, List, Union
+
+import numpy as np
+
+import datasets
+from flwr_datasets.partitioner.partitioner import Partitioner
+
+
+class SizePartitioner(Partitioner):
+    """Base class for the deterministic size partitioning schemes based on the cid.
+
+    The client with cid has the following relationship regarding the number of samples.
+    cid_to_size_fnc(cid) ~ number of samples for cid
+
+    If the function doesn't transform the cid it's a linear correlations of the number
+    of sample for cid and the value of cid. For instance, if the IDs range from 1 to M,
+    client with ID 1 gets 1 unit of data, client 2 gets 2 units, and so on, up to client
+     M which gets M units.
+
+    Note that size corresponding to the cid is deterministic, yet in case of different
+    dataset shuffling the assignment of samples to cid will vary.
+
+    Parameters
+    ----------
+    num_partitions: int
+        The total number of partitions that the data will be divided into.
+    cid_to_size_fnc: Callable
+        Function that defines the relationship between cid and the number of samples.
+    """
+
+    def __init__(
+        self,
+        num_partitions: int,
+        cid_to_size_fnc: Callable,  # type: ignore[type-arg]
+    ) -> None:
+        super().__init__()
+        if num_partitions <= 0:
+            raise ValueError("The number of partitions must be greater than zero.")
+        self._num_partitions = num_partitions
+        self._cid_to_size_fnc = cid_to_size_fnc
+
+        self._cid_to_size: Dict[int, int] = {}
+        self._cid_to_indices: Dict[int, List[int]] = {}
+        # A flag to perform only a single compute to determine the indices
+        self._cid_to_indices_determined = False
+
+    def load_partition(self, idx: int) -> datasets.Dataset:
+        """Load a single partition based on the partition index.
+
+        For this partitioner the number of samples is dependent on the partition idx.
+
+        Parameters
+        ----------
+        idx: int
+            the index that corresponds to the requested partition
+
+        Returns
+        -------
+        dataset_partition: Dataset
+            single dataset partition
+        """
+        # The partitioning is done lazily - only when the first partition is requested.
+        # A single run creates the indices assignments for all the partition indices.
+        self._determine_cid_to_indices_if_needed()
+        return self.dataset.select(self._cid_to_indices[idx])
+
+    @property
+    def cid_to_size(self) -> Dict[int, int]:
+        """Cid to the number of samples."""
+        return self._cid_to_size
+
+    @property
+    def cid_to_indices(self) -> Dict[int, List[int]]:
+        """Cid to the list of indices."""
+        return self._cid_to_indices
+
+    def _determine_cid_to_size(self) -> None:
+        """Determine data quantity associated with partition indices."""
+        data_division_in_units = self._cid_to_size_fnc(
+            np.linspace(start=1, stop=self._num_partitions, num=self._num_partitions)
+        )
+        total_units: Union[int, float] = data_division_in_units.sum()
+        # Normalize the units to get the fraction total dataset
+        partition_sizes_as_fraction = data_division_in_units / total_units
+        # Calculate the number of samples
+        partition_sizes_as_num_of_samples = np.array(
+            partition_sizes_as_fraction * len(self.dataset), dtype=np.int64
+        )
+        # Check if any sample is not allocated because of multiplication with fractions.
+        assigned_samples = np.sum(partition_sizes_as_num_of_samples)
+        left_unassigned_samples = len(self.dataset) - assigned_samples
+        # If there is any sample(s) left unassigned, assign it to the largest partition.
+        partition_sizes_as_num_of_samples[-1] += left_unassigned_samples
+        print(partition_sizes_as_num_of_samples)
+        for idx, partition_size in enumerate(partition_sizes_as_num_of_samples):
+            self._cid_to_size[idx] = partition_size
+
+        self._check_if_cid_to_size_possible()
+
+    def _determine_cid_to_indices_if_needed(self) -> None:
+        """Create an assignment of indices to the partition indices.."""
+        if self._cid_to_indices_determined is True:
+            return
+        self._determine_cid_to_size()
+        total_samples_assigned = 0
+        for idx, quantity in self._cid_to_size.items():
+            self._cid_to_indices[idx] = list(
+                range(total_samples_assigned, total_samples_assigned + quantity)
+            )
+            total_samples_assigned += quantity
+        self._cid_to_indices_determined = True
+
+    def _check_if_cid_to_size_possible(self) -> None:
+        all_positive = all(value > 0 for value in self.cid_to_size.values())
+        if not all_positive:
+            raise ValueError(
+                f"The given specification of the parameter num_partitions"
+                f"={self._num_partitions} for the given dataset results "
+                f"in the partitions sizes that are not greater than 0."
+            )

From 60f47fc78615a94e599dbab3ac2c80385efefcf6 Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Mon, 23 Oct 2023 16:40:31 +0200
Subject: [PATCH 02/17] Add size partitioner tests

---
 .../partitioner/size_partitioner_test.py      | 100 ++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 datasets/flwr_datasets/partitioner/size_partitioner_test.py

diff --git a/datasets/flwr_datasets/partitioner/size_partitioner_test.py b/datasets/flwr_datasets/partitioner/size_partitioner_test.py
new file mode 100644
index 000000000000..c5344cc01582
--- /dev/null
+++ b/datasets/flwr_datasets/partitioner/size_partitioner_test.py
@@ -0,0 +1,100 @@
+# Copyright 2023 Flower Labs GmbH. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""SizePartitioner tests."""
+import unittest
+
+from parameterized import parameterized
+
+from datasets import Dataset
+from flwr_datasets.partitioner.linear_partitioner import LinearPartitioner
+
+
+def _dummy_dataset(num_rows: int) -> Dataset:
+    data = {
+        "features": list(range(num_rows)),
+        "labels": [i % 2 for i in range(num_rows)],
+    }
+    dataset = Dataset.from_dict(data)
+    return dataset
+
+
+class TestLinearPartitioner(unittest.TestCase):
+    """Test LinearPartitioner."""
+
+    @parameterized.expand(  # type: ignore
+        [
+            (1, 100),
+            (10, 100),
+            (5, 55),  # This will leave some undivided samples
+        ]
+    )
+    def test_linear_distribution(self, num_partitions: int, num_rows: int) -> None:
+        """Test the linear distribution of samples."""
+        dataset = _dummy_dataset(num_rows)
+        partitioner = LinearPartitioner(num_partitions=num_partitions)
+        partitioner.dataset = dataset
+        # Run a single partition loading to trigger the division
+        _ = partitioner.load_partition(0)
+        total_samples = sum(partitioner.cid_to_size.values())
+        self.assertEqual(total_samples, num_rows)
+
+        # Testing if each partition is getting more than the previous one
+        last_count = 0
+        for i in range(num_partitions):
+            current_count = partitioner.cid_to_size[i]
+            self.assertGreaterEqual(current_count, last_count)
+            last_count = current_count
+
+    @parameterized.expand(  # type: ignore
+        [
+            (10, 100),
+            (5, 55),  # This will leave some undivided samples
+            (7, 77),  # This will leave some undivided samples
+        ]
+    )
+    def test_undivided_samples(self, num_partitions: int, num_rows: int) -> None:
+        """Test the logic for distributing undivided samples."""
+        dataset = _dummy_dataset(num_rows)
+        partitioner = LinearPartitioner(num_partitions=num_partitions)
+        partitioner.dataset = dataset
+        # If there are any undivided samples, they should be added to the largest
+        # partition
+        last_partition_id = num_partitions - 1
+        actual_samples_in_last_partition = len(
+            partitioner.load_partition(last_partition_id)
+        )
+        expected_samples_in_last_partition = partitioner.cid_to_size[last_partition_id]
+        self.assertEqual(
+            expected_samples_in_last_partition, actual_samples_in_last_partition
+        )
+
+    def test_meaningless_params(self) -> None:
+        """Test if the params leading to partition size not greater than zero raises."""
+        num_rows = 10
+        num_partitions = 100
+        dataset = _dummy_dataset(num_rows)
+        partitioner = LinearPartitioner(num_partitions=num_partitions)
+        partitioner.dataset = dataset
+        with self.assertRaises(ValueError) as context:
+            partitioner.load_partition(1)
+        self.assertIn(
+            "The given specification of the parameter num_partitions=10 for the given "
+            "dataset results in the partitions sizes that are not greater than 0.",
+            str(context.exception),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()

From c595d2cfdfc4e055777efe3702b08c0c7744e48f Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Mon, 23 Oct 2023 16:40:57 +0200
Subject: [PATCH 03/17] Add linear partitioner

---
 .../partitioner/linear_partitioner.py         | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 datasets/flwr_datasets/partitioner/linear_partitioner.py

diff --git a/datasets/flwr_datasets/partitioner/linear_partitioner.py b/datasets/flwr_datasets/partitioner/linear_partitioner.py
new file mode 100644
index 000000000000..a2543c72ab27
--- /dev/null
+++ b/datasets/flwr_datasets/partitioner/linear_partitioner.py
@@ -0,0 +1,36 @@
+# Copyright 2023 Flower Labs GmbH. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""LinearPartitioner class."""
+
+from flwr_datasets.partitioner.size_partitioner import SizePartitioner
+
+
+class LinearPartitioner(SizePartitioner):
+    """Partitioner creates partitions of size that are linearly correlated with its id.
+
+    The amount of data each client gets is linearly correlated with the client's ID.
+    For instance, if the IDs range from 1 to M, client with ID 1 gets 1 unit of data,
+    client 2 gets 2 units, and so on, up to client M which gets M units.
+
+    Parameters
+    ----------
+    num_partitions: int
+        The total number of partitions that the data will be divided into.
+    """
+
+    def __init__(self, num_partitions: int) -> None:
+        super().__init__(num_partitions=num_partitions, cid_to_size_fnc=lambda x: x)
+        if num_partitions <= 0:
+            raise ValueError("The number of partitions must be greater than zero.")

From 0c5ee2767cc8136f05dcf4e7cc96c47c6c81b377 Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Mon, 23 Oct 2023 16:41:04 +0200
Subject: [PATCH 04/17] Add square partitioner

---
 .../partitioner/square_partitioner.py         | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 datasets/flwr_datasets/partitioner/square_partitioner.py

diff --git a/datasets/flwr_datasets/partitioner/square_partitioner.py b/datasets/flwr_datasets/partitioner/square_partitioner.py
new file mode 100644
index 000000000000..424b20702db9
--- /dev/null
+++ b/datasets/flwr_datasets/partitioner/square_partitioner.py
@@ -0,0 +1,37 @@
+# Copyright 2023 Flower Labs GmbH. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""SquarePartitioner class."""
+import numpy as np
+
+from flwr_datasets.partitioner.size_partitioner import SizePartitioner
+
+
+class SquarePartitioner(SizePartitioner):
+    """Partitioner creates partitions of size that are correlated with squared ids.
+
+    The amount of data each client gets is correlated with the squared client's ID.
+    For instance, if the IDs range from 1 to M, client with ID 1 gets 1 unit of data,
+    client 2 gets 4 units, and so on, up to client M which gets M^2 units.
+
+    Parameters
+    ----------
+    num_partitions: int
+        The total number of partitions that the data will be divided into.
+    """
+
+    def __init__(self, num_partitions: int) -> None:
+        super().__init__(num_partitions=num_partitions, cid_to_size_fnc=np.square)
+        if num_partitions <= 0:
+            raise ValueError("The number of partitions must be greater than zero.")

From 3e05d5ccac7215a3bcbd379d4a54e6c11297c672 Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Mon, 23 Oct 2023 16:41:17 +0200
Subject: [PATCH 05/17] Add exponential partitioner

---
 .../partitioner/exponential_partitioner.py    | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 datasets/flwr_datasets/partitioner/exponential_partitioner.py

diff --git a/datasets/flwr_datasets/partitioner/exponential_partitioner.py b/datasets/flwr_datasets/partitioner/exponential_partitioner.py
new file mode 100644
index 000000000000..4d278f83f7da
--- /dev/null
+++ b/datasets/flwr_datasets/partitioner/exponential_partitioner.py
@@ -0,0 +1,37 @@
+# Copyright 2023 Flower Labs GmbH. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""ExponentialPartitioner class."""
+import numpy as np
+
+from flwr_datasets.partitioner.size_partitioner import SizePartitioner
+
+
+class ExponentialPartitioner(SizePartitioner):
+    """Partitioner creates partitions of size that are correlated with exp(ids).
+
+    The amount of data each client gets is correlated with the squared client's ID.
+    For instance, if the IDs range from 1 to M, client with ID 1 gets e units of data,
+    client 2 gets e^2 units, and so on, up to client M which gets e^M units.
+
+    Parameters
+    ----------
+    num_partitions: int
+        The total number of partitions that the data will be divided into.
+    """
+
+    def __init__(self, num_partitions: int) -> None:
+        super().__init__(num_partitions=num_partitions, cid_to_size_fnc=np.exp)
+        if num_partitions <= 0:
+            raise ValueError("The number of partitions must be greater than zero.")

From a37529ae3e95dbe4a3477af6ca134763bb41fc6b Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Mon, 23 Oct 2023 16:47:22 +0200
Subject: [PATCH 06/17] Remove print

---
 datasets/flwr_datasets/partitioner/size_partitioner.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/datasets/flwr_datasets/partitioner/size_partitioner.py b/datasets/flwr_datasets/partitioner/size_partitioner.py
index e471a6836c7e..2c5d51122e7e 100644
--- a/datasets/flwr_datasets/partitioner/size_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/size_partitioner.py
@@ -106,7 +106,6 @@ def _determine_cid_to_size(self) -> None:
         left_unassigned_samples = len(self.dataset) - assigned_samples
         # If there is any sample(s) left unassigned, assign it to the largest partition.
         partition_sizes_as_num_of_samples[-1] += left_unassigned_samples
-        print(partition_sizes_as_num_of_samples)
         for idx, partition_size in enumerate(partition_sizes_as_num_of_samples):
             self._cid_to_size[idx] = partition_size
 

From c393c1f0725912006669ca7ac91a1f5d8229f12a Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Mon, 23 Oct 2023 17:03:15 +0200
Subject: [PATCH 07/17] Fix tests

---
 datasets/flwr_datasets/partitioner/size_partitioner_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datasets/flwr_datasets/partitioner/size_partitioner_test.py b/datasets/flwr_datasets/partitioner/size_partitioner_test.py
index c5344cc01582..fa23fe18482f 100644
--- a/datasets/flwr_datasets/partitioner/size_partitioner_test.py
+++ b/datasets/flwr_datasets/partitioner/size_partitioner_test.py
@@ -90,7 +90,7 @@ def test_meaningless_params(self) -> None:
         with self.assertRaises(ValueError) as context:
             partitioner.load_partition(1)
         self.assertIn(
-            "The given specification of the parameter num_partitions=10 for the given "
+            "The given specification of the parameter num_partitions=100 for the given "
             "dataset results in the partitions sizes that are not greater than 0.",
             str(context.exception),
         )

From 614c6fc304a330cddedc9df3b410f99dfe4348d9 Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Tue, 24 Oct 2023 15:39:45 +0200
Subject: [PATCH 08/17] Add new partitioners to init for easy imports

---
 datasets/flwr_datasets/partitioner/__init__.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/datasets/flwr_datasets/partitioner/__init__.py b/datasets/flwr_datasets/partitioner/__init__.py
index d1eba7895702..83186a6d24ee 100644
--- a/datasets/flwr_datasets/partitioner/__init__.py
+++ b/datasets/flwr_datasets/partitioner/__init__.py
@@ -15,10 +15,18 @@
 """Flower Datasets Partitioner package."""
 
 
+from .exponential_partitioner import ExponentialPartitioner
 from .iid_partitioner import IidPartitioner
+from .linear_partitioner import LinearPartitioner
 from .partitioner import Partitioner
+from .size_partitioner import SizePartitioner
+from .square_partitioner import SquarePartitioner
 
 __all__ = [
     "IidPartitioner",
     "Partitioner",
+    "SizePartitioner",
+    "LinearPartitioner",
+    "SquarePartitioner",
+    "ExponentialPartitioner",
 ]

From 3038081c001295daa0b16a06fd347f10b8166c9d Mon Sep 17 00:00:00 2001
From: yan-gao-GY <yan@flower.dev>
Date: Thu, 26 Oct 2023 20:15:15 +0100
Subject: [PATCH 09/17] Add jxie/higgs as tested_datasets

---
 datasets/flwr_datasets/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py
index 7badb1445460..c3409f3f0454 100644
--- a/datasets/flwr_datasets/utils.py
+++ b/datasets/flwr_datasets/utils.py
@@ -28,6 +28,7 @@
     "fashion_mnist",
     "sasha/dog-food",
     "zh-plus/tiny-imagenet",
+    "jxie/higgs",
 ]
 
 

From d0f2cfc78239f6fc0783be6cf7023fa9b476d4c4 Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Tue, 7 Nov 2023 10:22:56 +0100
Subject: [PATCH 10/17] Remove jxie/higgs from tested_datasets list

---
 datasets/flwr_datasets/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py
index c3409f3f0454..7badb1445460 100644
--- a/datasets/flwr_datasets/utils.py
+++ b/datasets/flwr_datasets/utils.py
@@ -28,7 +28,6 @@
     "fashion_mnist",
     "sasha/dog-food",
     "zh-plus/tiny-imagenet",
-    "jxie/higgs",
 ]
 
 

From 05bba2d4aca4dcd24ac330a31a3bc211db55d8f7 Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Tue, 7 Nov 2023 10:26:16 +0100
Subject: [PATCH 11/17] Clarify _check_if_cid_to_size_possible by using >= 1
 instead >0

---
 datasets/flwr_datasets/partitioner/size_partitioner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datasets/flwr_datasets/partitioner/size_partitioner.py b/datasets/flwr_datasets/partitioner/size_partitioner.py
index 2c5d51122e7e..0925f61b4a23 100644
--- a/datasets/flwr_datasets/partitioner/size_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/size_partitioner.py
@@ -125,7 +125,7 @@ def _determine_cid_to_indices_if_needed(self) -> None:
         self._cid_to_indices_determined = True
 
     def _check_if_cid_to_size_possible(self) -> None:
-        all_positive = all(value > 0 for value in self.cid_to_size.values())
+        all_positive = all(value >= 1 for value in self.cid_to_size.values())
         if not all_positive:
             raise ValueError(
                 f"The given specification of the parameter num_partitions"

From b5d456ec2713b8c1347e7f00a8594bf7d9a07a08 Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Tue, 7 Nov 2023 10:50:16 +0100
Subject: [PATCH 12/17] Rename cid to id (in each method)

---
 .../partitioner/size_partitioner.py            | 18 +++++++++---------
 .../partitioner/size_partitioner_test.py       |  6 +++---
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/datasets/flwr_datasets/partitioner/size_partitioner.py b/datasets/flwr_datasets/partitioner/size_partitioner.py
index 0925f61b4a23..575dfa9d8ce4 100644
--- a/datasets/flwr_datasets/partitioner/size_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/size_partitioner.py
@@ -76,20 +76,20 @@ def load_partition(self, idx: int) -> datasets.Dataset:
         """
         # The partitioning is done lazily - only when the first partition is requested.
         # A single run creates the indices assignments for all the partition indices.
-        self._determine_cid_to_indices_if_needed()
+        self._determine_id_to_indices_if_needed()
         return self.dataset.select(self._cid_to_indices[idx])
 
     @property
-    def cid_to_size(self) -> Dict[int, int]:
+    def id_to_size(self) -> Dict[int, int]:
         """Cid to the number of samples."""
         return self._cid_to_size
 
     @property
-    def cid_to_indices(self) -> Dict[int, List[int]]:
+    def id_to_indices(self) -> Dict[int, List[int]]:
         """Cid to the list of indices."""
         return self._cid_to_indices
 
-    def _determine_cid_to_size(self) -> None:
+    def _determine_id_to_size(self) -> None:
         """Determine data quantity associated with partition indices."""
         data_division_in_units = self._cid_to_size_fnc(
             np.linspace(start=1, stop=self._num_partitions, num=self._num_partitions)
@@ -109,13 +109,13 @@ def _determine_cid_to_size(self) -> None:
         for idx, partition_size in enumerate(partition_sizes_as_num_of_samples):
             self._cid_to_size[idx] = partition_size
 
-        self._check_if_cid_to_size_possible()
+        self._check_if_id_to_size_possible()
 
-    def _determine_cid_to_indices_if_needed(self) -> None:
+    def _determine_id_to_indices_if_needed(self) -> None:
         """Create an assignment of indices to the partition indices.."""
         if self._cid_to_indices_determined is True:
             return
-        self._determine_cid_to_size()
+        self._determine_id_to_size()
         total_samples_assigned = 0
         for idx, quantity in self._cid_to_size.items():
             self._cid_to_indices[idx] = list(
@@ -124,8 +124,8 @@ def _determine_cid_to_indices_if_needed(self) -> None:
             total_samples_assigned += quantity
         self._cid_to_indices_determined = True
 
-    def _check_if_cid_to_size_possible(self) -> None:
-        all_positive = all(value >= 1 for value in self.cid_to_size.values())
+    def _check_if_id_to_size_possible(self) -> None:
+        all_positive = all(value >= 1 for value in self.id_to_size.values())
         if not all_positive:
             raise ValueError(
                 f"The given specification of the parameter num_partitions"
diff --git a/datasets/flwr_datasets/partitioner/size_partitioner_test.py b/datasets/flwr_datasets/partitioner/size_partitioner_test.py
index fa23fe18482f..6bcdb59e7cd6 100644
--- a/datasets/flwr_datasets/partitioner/size_partitioner_test.py
+++ b/datasets/flwr_datasets/partitioner/size_partitioner_test.py
@@ -47,13 +47,13 @@ def test_linear_distribution(self, num_partitions: int, num_rows: int) -> None:
         partitioner.dataset = dataset
         # Run a single partition loading to trigger the division
         _ = partitioner.load_partition(0)
-        total_samples = sum(partitioner.cid_to_size.values())
+        total_samples = sum(partitioner.id_to_size.values())
         self.assertEqual(total_samples, num_rows)
 
         # Testing if each partition is getting more than the previous one
         last_count = 0
         for i in range(num_partitions):
-            current_count = partitioner.cid_to_size[i]
+            current_count = partitioner.id_to_size[i]
             self.assertGreaterEqual(current_count, last_count)
             last_count = current_count
 
@@ -75,7 +75,7 @@ def test_undivided_samples(self, num_partitions: int, num_rows: int) -> None:
         actual_samples_in_last_partition = len(
             partitioner.load_partition(last_partition_id)
         )
-        expected_samples_in_last_partition = partitioner.cid_to_size[last_partition_id]
+        expected_samples_in_last_partition = partitioner.id_to_size[last_partition_id]
         self.assertEqual(
             expected_samples_in_last_partition, actual_samples_in_last_partition
         )

From 55691d91e478654415c43b3a5c7b1183672387e4 Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Tue, 7 Nov 2023 11:01:29 +0100
Subject: [PATCH 13/17] Clarify documentation

---
 .../partitioner/exponential_partitioner.py           | 12 ++++++++----
 .../flwr_datasets/partitioner/linear_partitioner.py  |  4 ++--
 .../flwr_datasets/partitioner/square_partitioner.py  |  4 ++--
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/datasets/flwr_datasets/partitioner/exponential_partitioner.py b/datasets/flwr_datasets/partitioner/exponential_partitioner.py
index 4d278f83f7da..7e2bd85bfdf6 100644
--- a/datasets/flwr_datasets/partitioner/exponential_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/exponential_partitioner.py
@@ -19,11 +19,15 @@
 
 
 class ExponentialPartitioner(SizePartitioner):
-    """Partitioner creates partitions of size that are correlated with exp(ids).
+    """Partitioner creates partitions of size that are correlated with exp(idx).
 
-    The amount of data each client gets is correlated with the squared client's ID.
-    For instance, if the IDs range from 1 to M, client with ID 1 gets e units of data,
-    client 2 gets e^2 units, and so on, up to client M which gets e^M units.
+    The amount of data each client gets is correlated with the exponent of partition ID.
+    For instance, if the IDs range from 1 to M, client with ID 1 gets e units of
+    data, client 2 gets e^2 units, and so on, up to client M which gets e^M units.
+    The floor operation is applied on each of these numbers, it means floor(2.71...)
+    = 2; e^2 ~ 7.39 floor(7.39) = 7. The number is rounded down = the fraction is
+    always cut. The remainders of theses unassigned (fraction) samples is added to the
+    biggest partition (the one with the biggest idx).
 
     Parameters
     ----------
diff --git a/datasets/flwr_datasets/partitioner/linear_partitioner.py b/datasets/flwr_datasets/partitioner/linear_partitioner.py
index a2543c72ab27..a8e7e7790d61 100644
--- a/datasets/flwr_datasets/partitioner/linear_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/linear_partitioner.py
@@ -18,9 +18,9 @@
 
 
 class LinearPartitioner(SizePartitioner):
-    """Partitioner creates partitions of size that are linearly correlated with its id.
+    """Partitioner creates partitions of size that are linearly correlated with idx.
 
-    The amount of data each client gets is linearly correlated with the client's ID.
+    The amount of data each client gets is linearly correlated with the partition ID.
     For instance, if the IDs range from 1 to M, client with ID 1 gets 1 unit of data,
     client 2 gets 2 units, and so on, up to client M which gets M units.
 
diff --git a/datasets/flwr_datasets/partitioner/square_partitioner.py b/datasets/flwr_datasets/partitioner/square_partitioner.py
index 424b20702db9..8b683be4c589 100644
--- a/datasets/flwr_datasets/partitioner/square_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/square_partitioner.py
@@ -19,9 +19,9 @@
 
 
 class SquarePartitioner(SizePartitioner):
-    """Partitioner creates partitions of size that are correlated with squared ids.
+    """Partitioner creates partitions of size that are correlated with squared idx.
 
-    The amount of data each client gets is correlated with the squared client's ID.
+    The amount of data each client gets is correlated with the squared partition ID.
     For instance, if the IDs range from 1 to M, client with ID 1 gets 1 unit of data,
     client 2 gets 4 units, and so on, up to client M which gets M^2 units.
 

From 32c711722b6716de4372d062664a91b1f69debf2 Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Tue, 7 Nov 2023 12:33:03 +0100
Subject: [PATCH 14/17] Apply review suggestion

---
 .../partitioner/exponential_partitioner.py       |  6 ++++--
 .../partitioner/linear_partitioner.py            |  5 +++--
 .../partitioner/size_partitioner.py              | 16 +++++++++-------
 .../partitioner/size_partitioner_test.py         |  2 ++
 .../partitioner/square_partitioner.py            |  6 ++++--
 5 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/datasets/flwr_datasets/partitioner/exponential_partitioner.py b/datasets/flwr_datasets/partitioner/exponential_partitioner.py
index 7e2bd85bfdf6..48ac59dbe42a 100644
--- a/datasets/flwr_datasets/partitioner/exponential_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/exponential_partitioner.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 # ==============================================================================
 """ExponentialPartitioner class."""
+
+
 import numpy as np
 
 from flwr_datasets.partitioner.size_partitioner import SizePartitioner
@@ -31,11 +33,11 @@ class ExponentialPartitioner(SizePartitioner):
 
     Parameters
     ----------
-    num_partitions: int
+    num_partitions : int
         The total number of partitions that the data will be divided into.
     """
 
     def __init__(self, num_partitions: int) -> None:
-        super().__init__(num_partitions=num_partitions, cid_to_size_fnc=np.exp)
+        super().__init__(num_partitions=num_partitions, cid_to_size_fn=np.exp)
         if num_partitions <= 0:
             raise ValueError("The number of partitions must be greater than zero.")
diff --git a/datasets/flwr_datasets/partitioner/linear_partitioner.py b/datasets/flwr_datasets/partitioner/linear_partitioner.py
index a8e7e7790d61..9af8cfb137ca 100644
--- a/datasets/flwr_datasets/partitioner/linear_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/linear_partitioner.py
@@ -14,6 +14,7 @@
 # ==============================================================================
 """LinearPartitioner class."""
 
+
 from flwr_datasets.partitioner.size_partitioner import SizePartitioner
 
 
@@ -26,11 +27,11 @@ class LinearPartitioner(SizePartitioner):
 
     Parameters
     ----------
-    num_partitions: int
+    num_partitions : int
         The total number of partitions that the data will be divided into.
     """
 
     def __init__(self, num_partitions: int) -> None:
-        super().__init__(num_partitions=num_partitions, cid_to_size_fnc=lambda x: x)
+        super().__init__(num_partitions=num_partitions, cid_to_size_fn=lambda x: x)
         if num_partitions <= 0:
             raise ValueError("The number of partitions must be greater than zero.")
diff --git a/datasets/flwr_datasets/partitioner/size_partitioner.py b/datasets/flwr_datasets/partitioner/size_partitioner.py
index 575dfa9d8ce4..46eea2907c9b 100644
--- a/datasets/flwr_datasets/partitioner/size_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/size_partitioner.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""DataQuantityPartitioner class."""
+"""SizePartitioner class."""
+
+
 from typing import Callable, Dict, List, Union
 
 import numpy as np
@@ -37,22 +39,22 @@ class SizePartitioner(Partitioner):
 
     Parameters
     ----------
-    num_partitions: int
+    num_partitions : int
         The total number of partitions that the data will be divided into.
-    cid_to_size_fnc: Callable
+    cid_to_size_fn : Callable
         Function that defines the relationship between cid and the number of samples.
     """
 
     def __init__(
         self,
         num_partitions: int,
-        cid_to_size_fnc: Callable,  # type: ignore[type-arg]
+        cid_to_size_fn: Callable,  # type: ignore[type-arg]
     ) -> None:
         super().__init__()
         if num_partitions <= 0:
             raise ValueError("The number of partitions must be greater than zero.")
         self._num_partitions = num_partitions
-        self._cid_to_size_fnc = cid_to_size_fnc
+        self._cid_to_size_fn = cid_to_size_fn
 
         self._cid_to_size: Dict[int, int] = {}
         self._cid_to_indices: Dict[int, List[int]] = {}
@@ -66,7 +68,7 @@ def load_partition(self, idx: int) -> datasets.Dataset:
 
         Parameters
         ----------
-        idx: int
+        idx : int
             the index that corresponds to the requested partition
 
         Returns
@@ -91,7 +93,7 @@ def id_to_indices(self) -> Dict[int, List[int]]:
 
     def _determine_id_to_size(self) -> None:
         """Determine data quantity associated with partition indices."""
-        data_division_in_units = self._cid_to_size_fnc(
+        data_division_in_units = self._cid_to_size_fn(
             np.linspace(start=1, stop=self._num_partitions, num=self._num_partitions)
         )
         total_units: Union[int, float] = data_division_in_units.sum()
diff --git a/datasets/flwr_datasets/partitioner/size_partitioner_test.py b/datasets/flwr_datasets/partitioner/size_partitioner_test.py
index 6bcdb59e7cd6..dd7d70253d9a 100644
--- a/datasets/flwr_datasets/partitioner/size_partitioner_test.py
+++ b/datasets/flwr_datasets/partitioner/size_partitioner_test.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 # ==============================================================================
 """SizePartitioner tests."""
+
+
 import unittest
 
 from parameterized import parameterized
diff --git a/datasets/flwr_datasets/partitioner/square_partitioner.py b/datasets/flwr_datasets/partitioner/square_partitioner.py
index 8b683be4c589..1619f2826311 100644
--- a/datasets/flwr_datasets/partitioner/square_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/square_partitioner.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 # ==============================================================================
 """SquarePartitioner class."""
+
+
 import numpy as np
 
 from flwr_datasets.partitioner.size_partitioner import SizePartitioner
@@ -27,11 +29,11 @@ class SquarePartitioner(SizePartitioner):
 
     Parameters
     ----------
-    num_partitions: int
+    num_partitions : int
         The total number of partitions that the data will be divided into.
     """
 
     def __init__(self, num_partitions: int) -> None:
-        super().__init__(num_partitions=num_partitions, cid_to_size_fnc=np.square)
+        super().__init__(num_partitions=num_partitions, cid_to_size_fn=np.square)
         if num_partitions <= 0:
             raise ValueError("The number of partitions must be greater than zero.")

From e048cc6f2230cffe7fbcc1752b6da3a7882bff85 Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Tue, 7 Nov 2023 12:42:55 +0100
Subject: [PATCH 15/17] Rename cid to node_id

---
 .../partitioner/exponential_partitioner.py    |  2 +-
 .../partitioner/linear_partitioner.py         |  2 +-
 .../partitioner/size_partitioner.py           | 75 ++++++++++---------
 .../partitioner/size_partitioner_test.py      |  8 +-
 .../partitioner/square_partitioner.py         |  2 +-
 5 files changed, 47 insertions(+), 42 deletions(-)

diff --git a/datasets/flwr_datasets/partitioner/exponential_partitioner.py b/datasets/flwr_datasets/partitioner/exponential_partitioner.py
index 48ac59dbe42a..07bb07cea9a5 100644
--- a/datasets/flwr_datasets/partitioner/exponential_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/exponential_partitioner.py
@@ -38,6 +38,6 @@ class ExponentialPartitioner(SizePartitioner):
     """
 
     def __init__(self, num_partitions: int) -> None:
-        super().__init__(num_partitions=num_partitions, cid_to_size_fn=np.exp)
+        super().__init__(num_partitions=num_partitions, node_id_to_size_fn=np.exp)
         if num_partitions <= 0:
             raise ValueError("The number of partitions must be greater than zero.")
diff --git a/datasets/flwr_datasets/partitioner/linear_partitioner.py b/datasets/flwr_datasets/partitioner/linear_partitioner.py
index 9af8cfb137ca..a9dc8b020c08 100644
--- a/datasets/flwr_datasets/partitioner/linear_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/linear_partitioner.py
@@ -32,6 +32,6 @@ class LinearPartitioner(SizePartitioner):
     """
 
     def __init__(self, num_partitions: int) -> None:
-        super().__init__(num_partitions=num_partitions, cid_to_size_fn=lambda x: x)
+        super().__init__(num_partitions=num_partitions, node_id_to_size_fn=lambda x: x)
         if num_partitions <= 0:
             raise ValueError("The number of partitions must be greater than zero.")
diff --git a/datasets/flwr_datasets/partitioner/size_partitioner.py b/datasets/flwr_datasets/partitioner/size_partitioner.py
index 46eea2907c9b..dda31b4aa016 100644
--- a/datasets/flwr_datasets/partitioner/size_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/size_partitioner.py
@@ -24,42 +24,45 @@
 
 
 class SizePartitioner(Partitioner):
-    """Base class for the deterministic size partitioning schemes based on the cid.
+    """Base class for the deterministic size partitioning schemes based on the node id.
 
-    The client with cid has the following relationship regarding the number of samples.
-    cid_to_size_fnc(cid) ~ number of samples for cid
+    The client with node is has the following relationship regarding the number of
+    samples.
 
-    If the function doesn't transform the cid it's a linear correlations of the number
-    of sample for cid and the value of cid. For instance, if the IDs range from 1 to M,
-    client with ID 1 gets 1 unit of data, client 2 gets 2 units, and so on, up to client
-     M which gets M units.
+    node_id_to_size_fn(node_id) ~ number of samples for node_id
 
-    Note that size corresponding to the cid is deterministic, yet in case of different
-    dataset shuffling the assignment of samples to cid will vary.
+    If the function doesn't transform the node_id it's a linear correlations between
+    the number of sample for the node and the value of node_id. For instance, if the
+    node ids range from 1 to M, node with id 1 gets 1 unit of data, client 2 gets 2
+    units, and so on, up to node M which gets M units.
+
+    Note that size corresponding to the node_id is deterministic, yet in case of
+    different dataset shuffling the assignment of samples to node_id will vary.
 
     Parameters
     ----------
     num_partitions : int
         The total number of partitions that the data will be divided into.
-    cid_to_size_fn : Callable
-        Function that defines the relationship between cid and the number of samples.
+    node_id_to_size_fn : Callable
+        Function that defines the relationship between node id and the number of
+        samples.
     """
 
     def __init__(
         self,
         num_partitions: int,
-        cid_to_size_fn: Callable,  # type: ignore[type-arg]
+        node_id_to_size_fn: Callable,  # type: ignore[type-arg]
     ) -> None:
         super().__init__()
         if num_partitions <= 0:
             raise ValueError("The number of partitions must be greater than zero.")
         self._num_partitions = num_partitions
-        self._cid_to_size_fn = cid_to_size_fn
+        self._node_id_to_size_fn = node_id_to_size_fn
 
-        self._cid_to_size: Dict[int, int] = {}
-        self._cid_to_indices: Dict[int, List[int]] = {}
+        self._node_id_to_size: Dict[int, int] = {}
+        self._node_id_to_indices: Dict[int, List[int]] = {}
         # A flag to perform only a single compute to determine the indices
-        self._cid_to_indices_determined = False
+        self._node_id_to_indices_determined = False
 
     def load_partition(self, idx: int) -> datasets.Dataset:
         """Load a single partition based on the partition index.
@@ -78,22 +81,22 @@ def load_partition(self, idx: int) -> datasets.Dataset:
         """
         # The partitioning is done lazily - only when the first partition is requested.
         # A single run creates the indices assignments for all the partition indices.
-        self._determine_id_to_indices_if_needed()
-        return self.dataset.select(self._cid_to_indices[idx])
+        self._determine_node_id_to_indices_if_needed()
+        return self.dataset.select(self._node_id_to_indices[idx])
 
     @property
-    def id_to_size(self) -> Dict[int, int]:
-        """Cid to the number of samples."""
-        return self._cid_to_size
+    def node_id_to_size(self) -> Dict[int, int]:
+        """Node id to the number of samples."""
+        return self._node_id_to_size
 
     @property
-    def id_to_indices(self) -> Dict[int, List[int]]:
-        """Cid to the list of indices."""
-        return self._cid_to_indices
+    def node_id_to_indices(self) -> Dict[int, List[int]]:
+        """Node id to the list of indices."""
+        return self._node_id_to_indices
 
-    def _determine_id_to_size(self) -> None:
+    def _determine_node_id_to_size(self) -> None:
         """Determine data quantity associated with partition indices."""
-        data_division_in_units = self._cid_to_size_fn(
+        data_division_in_units = self._node_id_to_size_fn(
             np.linspace(start=1, stop=self._num_partitions, num=self._num_partitions)
         )
         total_units: Union[int, float] = data_division_in_units.sum()
@@ -109,25 +112,25 @@ def _determine_id_to_size(self) -> None:
         # If there is any sample(s) left unassigned, assign it to the largest partition.
         partition_sizes_as_num_of_samples[-1] += left_unassigned_samples
         for idx, partition_size in enumerate(partition_sizes_as_num_of_samples):
-            self._cid_to_size[idx] = partition_size
+            self._node_id_to_size[idx] = partition_size
 
-        self._check_if_id_to_size_possible()
+        self._check_if_node_id_to_size_possible()
 
-    def _determine_id_to_indices_if_needed(self) -> None:
+    def _determine_node_id_to_indices_if_needed(self) -> None:
         """Create an assignment of indices to the partition indices.."""
-        if self._cid_to_indices_determined is True:
+        if self._node_id_to_indices_determined is True:
             return
-        self._determine_id_to_size()
+        self._determine_node_id_to_size()
         total_samples_assigned = 0
-        for idx, quantity in self._cid_to_size.items():
-            self._cid_to_indices[idx] = list(
+        for idx, quantity in self._node_id_to_size.items():
+            self._node_id_to_indices[idx] = list(
                 range(total_samples_assigned, total_samples_assigned + quantity)
             )
             total_samples_assigned += quantity
-        self._cid_to_indices_determined = True
+        self._node_id_to_indices_determined = True
 
-    def _check_if_id_to_size_possible(self) -> None:
-        all_positive = all(value >= 1 for value in self.id_to_size.values())
+    def _check_if_node_id_to_size_possible(self) -> None:
+        all_positive = all(value >= 1 for value in self.node_id_to_size.values())
         if not all_positive:
             raise ValueError(
                 f"The given specification of the parameter num_partitions"
diff --git a/datasets/flwr_datasets/partitioner/size_partitioner_test.py b/datasets/flwr_datasets/partitioner/size_partitioner_test.py
index dd7d70253d9a..390f6a613fce 100644
--- a/datasets/flwr_datasets/partitioner/size_partitioner_test.py
+++ b/datasets/flwr_datasets/partitioner/size_partitioner_test.py
@@ -49,13 +49,13 @@ def test_linear_distribution(self, num_partitions: int, num_rows: int) -> None:
         partitioner.dataset = dataset
         # Run a single partition loading to trigger the division
         _ = partitioner.load_partition(0)
-        total_samples = sum(partitioner.id_to_size.values())
+        total_samples = sum(partitioner.node_id_to_size.values())
         self.assertEqual(total_samples, num_rows)
 
         # Testing if each partition is getting more than the previous one
         last_count = 0
         for i in range(num_partitions):
-            current_count = partitioner.id_to_size[i]
+            current_count = partitioner.node_id_to_size[i]
             self.assertGreaterEqual(current_count, last_count)
             last_count = current_count
 
@@ -77,7 +77,9 @@ def test_undivided_samples(self, num_partitions: int, num_rows: int) -> None:
         actual_samples_in_last_partition = len(
             partitioner.load_partition(last_partition_id)
         )
-        expected_samples_in_last_partition = partitioner.id_to_size[last_partition_id]
+        expected_samples_in_last_partition = partitioner.node_id_to_size[
+            last_partition_id
+        ]
         self.assertEqual(
             expected_samples_in_last_partition, actual_samples_in_last_partition
         )
diff --git a/datasets/flwr_datasets/partitioner/square_partitioner.py b/datasets/flwr_datasets/partitioner/square_partitioner.py
index 1619f2826311..25affaf304ef 100644
--- a/datasets/flwr_datasets/partitioner/square_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/square_partitioner.py
@@ -34,6 +34,6 @@ class SquarePartitioner(SizePartitioner):
     """
 
     def __init__(self, num_partitions: int) -> None:
-        super().__init__(num_partitions=num_partitions, cid_to_size_fn=np.square)
+        super().__init__(num_partitions=num_partitions, node_id_to_size_fn=np.square)
         if num_partitions <= 0:
             raise ValueError("The number of partitions must be greater than zero.")

From 620924f0f85614a121480c5bb4194ec8a689782f Mon Sep 17 00:00:00 2001
From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com>
Date: Tue, 7 Nov 2023 13:09:42 +0100
Subject: [PATCH 16/17] Apply suggestions from code review

Co-authored-by: Daniel J. Beutel <daniel@flower.dev>
---
 .../flwr_datasets/partitioner/size_partitioner.py  | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/datasets/flwr_datasets/partitioner/size_partitioner.py b/datasets/flwr_datasets/partitioner/size_partitioner.py
index dda31b4aa016..7fbe4fd2f70d 100644
--- a/datasets/flwr_datasets/partitioner/size_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/size_partitioner.py
@@ -24,20 +24,20 @@
 
 
 class SizePartitioner(Partitioner):
-    """Base class for the deterministic size partitioning schemes based on the node id.
+    """Base class for the deterministic size partitioning schemes based on the `node_id`.
 
-    The client with node is has the following relationship regarding the number of
+    The client with `node_id` has the following relationship regarding the number of
     samples.
 
-    node_id_to_size_fn(node_id) ~ number of samples for node_id
+    `node_id_to_size_fn(node_id)` ~ number of samples for `node_id`
 
-    If the function doesn't transform the node_id it's a linear correlations between
-    the number of sample for the node and the value of node_id. For instance, if the
+    If the function doesn't transform the `node_id` it's a linear correlation between
+    the number of sample for the node and the value of `node_id`. For instance, if the
     node ids range from 1 to M, node with id 1 gets 1 unit of data, client 2 gets 2
     units, and so on, up to node M which gets M units.
 
-    Note that size corresponding to the node_id is deterministic, yet in case of
-    different dataset shuffling the assignment of samples to node_id will vary.
+    Note that size corresponding to the `node_id` is deterministic, yet in case of
+    different dataset shuffling the assignment of samples to `node_id` will vary.
 
     Parameters
     ----------

From def2d437394be7dead26adf82076832e493cc35e Mon Sep 17 00:00:00 2001
From: Adam Narozniak <adam@flower.dev>
Date: Tue, 7 Nov 2023 13:12:00 +0100
Subject: [PATCH 17/17] Change SizePartitioner doc to make line width
 requirement

---
 datasets/flwr_datasets/partitioner/size_partitioner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datasets/flwr_datasets/partitioner/size_partitioner.py b/datasets/flwr_datasets/partitioner/size_partitioner.py
index 7fbe4fd2f70d..27e1f528dc52 100644
--- a/datasets/flwr_datasets/partitioner/size_partitioner.py
+++ b/datasets/flwr_datasets/partitioner/size_partitioner.py
@@ -24,7 +24,7 @@
 
 
 class SizePartitioner(Partitioner):
-    """Base class for the deterministic size partitioning schemes based on the `node_id`.
+    """Base class for the deterministic size partitioning based on the `node_id`.
 
     The client with `node_id` has the following relationship regarding the number of
     samples.