Rename idx to node_id (#2590)

adap · Nov 15, 2023 · 93b8a3d · 93b8a3d
1 parent 3c89754
commit 93b8a3d
Show file tree

Hide file tree

Showing 10 changed files with 21 additions and 21 deletions.
diff --git a/datasets/README.md b/datasets/README.md
@@ -53,7 +53,7 @@ If you plan to change the type of the dataset to run the code with your ML frame
 
 # Usage
 
-The Flower Datasets exposes `FederatedDataset(dataset, partitioners)` abstraction to represent the dataset needed for federated learning/analytics. It has two powerful methods that let you handle the dataset preprocessing. They are `load_partition(idx, split)` and `load_full(split)`.
+The Flower Datasets exposes `FederatedDataset(dataset, partitioners)` abstraction to represent the dataset needed for federated learning/analytics. It has two powerful methods that let you handle the dataset preprocessing. They are `load_partition(node_id, split)` and `load_full(split)`.
 
 Here's a quick example of how to partition the MNIST dataset:
 

diff --git a/datasets/flwr_datasets/federated_dataset.py b/datasets/flwr_datasets/federated_dataset.py
@@ -101,15 +101,15 @@ def __init__(
         # Indicate if the dataset is prepared for `load_partition` or `load_full`
         self._dataset_prepared: bool = False
 
-    def load_partition(self, idx: int, split: Optional[str] = None) -> Dataset:
+    def load_partition(self, node_id: int, split: Optional[str] = None) -> Dataset:
         """Load the partition specified by the idx in the selected split.
 
         The dataset is downloaded only when the first call to `load_partition` or
         `load_full` is made.
 
         Parameters
         ----------
-        idx : int
+        node_id : int
             Partition index for the selected split, idx in {0, ..., num_partitions - 1}.
         split : Optional[str]
             Name of the (partitioned) split (e.g. "train", "test"). You can skip this
@@ -135,7 +135,7 @@ def load_partition(self, idx: int, split: Optional[str] = None) -> Dataset:
         self._check_if_split_possible_to_federate(split)
         partitioner: Partitioner = self._partitioners[split]
         self._assign_dataset_to_partitioner(split)
-        return partitioner.load_partition(idx)
+        return partitioner.load_partition(node_id)
 
     def load_full(self, split: str) -> Dataset:
         """Load the full split of the dataset.

diff --git a/datasets/flwr_datasets/partitioner/exponential_partitioner.py b/datasets/flwr_datasets/partitioner/exponential_partitioner.py
@@ -21,15 +21,15 @@
 
 
 class ExponentialPartitioner(SizePartitioner):
-    """Partitioner creates partitions of size that are correlated with exp(idx).
+    """Partitioner creates partitions of size that are correlated with exp(node_id).
 
     The amount of data each client gets is correlated with the exponent of partition ID.
     For instance, if the IDs range from 1 to M, client with ID 1 gets e units of
     data, client 2 gets e^2 units, and so on, up to client M which gets e^M units.
     The floor operation is applied on each of these numbers, it means floor(2.71...)
     = 2; e^2 ~ 7.39 floor(7.39) = 7. The number is rounded down = the fraction is
     always cut. The remainders of theses unassigned (fraction) samples is added to the
-    biggest partition (the one with the biggest idx).
+    biggest partition (the one with the biggest node_id).
 
     Parameters
     ----------

diff --git a/datasets/flwr_datasets/partitioner/iid_partitioner.py b/datasets/flwr_datasets/partitioner/iid_partitioner.py
@@ -34,12 +34,12 @@ def __init__(self, num_partitions: int) -> None:
             raise ValueError("The number of partitions must be greater than zero.")
         self._num_partitions = num_partitions
 
-    def load_partition(self, idx: int) -> datasets.Dataset:
+    def load_partition(self, node_id: int) -> datasets.Dataset:
         """Load a single IID partition based on the partition index.
 
         Parameters
         ----------
-        idx: int
+        node_id : int
             the index that corresponds to the requested partition
 
         Returns
@@ -48,5 +48,5 @@ def load_partition(self, idx: int) -> datasets.Dataset:
             single dataset partition
         """
         return self.dataset.shard(
-            num_shards=self._num_partitions, index=idx, contiguous=True
+            num_shards=self._num_partitions, index=node_id, contiguous=True
         )
diff --git a/datasets/flwr_datasets/partitioner/linear_partitioner.py b/datasets/flwr_datasets/partitioner/linear_partitioner.py
@@ -19,7 +19,7 @@
 
 
 class LinearPartitioner(SizePartitioner):
-    """Partitioner creates partitions of size that are linearly correlated with idx.
+    """Partitioner creates partitions of size that are linearly correlated with node_id.
 
     The amount of data each client gets is linearly correlated with the partition ID.
     For instance, if the IDs range from 1 to M, client with ID 1 gets 1 unit of data,

diff --git a/datasets/flwr_datasets/partitioner/natural_id_partitioner.py b/datasets/flwr_datasets/partitioner/natural_id_partitioner.py
@@ -42,15 +42,15 @@ def _create_int_node_id_to_natural_id(self) -> None:
             zip(range(len(unique_natural_ids)), unique_natural_ids)
         )
 
-    def load_partition(self, idx: int) -> datasets.Dataset:
+    def load_partition(self, node_id: int) -> datasets.Dataset:
         """Load a single partition corresponding to a single `node_id`.
 
         The choice of the partition is based on unique integers assigned to each
         natural id present in the dataset in the `partition_by` column.
 
         Parameters
         ----------
-        idx: int
+        node_id : int
             the index that corresponds to the requested partition
 
         Returns
@@ -62,7 +62,7 @@ def load_partition(self, idx: int) -> datasets.Dataset:
             self._create_int_node_id_to_natural_id()
 
         return self.dataset.filter(
-            lambda row: row[self._partition_by] == self._node_id_to_natural_id[idx]
+            lambda row: row[self._partition_by] == self._node_id_to_natural_id[node_id]
         )
 
     @property

diff --git a/datasets/flwr_datasets/partitioner/natural_id_partitioner_test.py b/datasets/flwr_datasets/partitioner/natural_id_partitioner_test.py
@@ -105,7 +105,7 @@ def test_correct_number_of_partitions(
     ) -> None:
         """Test if the # of available partitions is equal to # of unique clients."""
         _, partitioner = _dummy_setup(num_rows, num_unique_natural_ids)
-        _ = partitioner.load_partition(idx=0)
+        _ = partitioner.load_partition(node_id=0)
         self.assertEqual(len(partitioner.node_id_to_natural_id), num_unique_natural_ids)
 
     def test_cannot_set_node_id_to_natural_id(self) -> None:

diff --git a/datasets/flwr_datasets/partitioner/partitioner.py b/datasets/flwr_datasets/partitioner/partitioner.py
@@ -53,12 +53,12 @@ def dataset(self, value: Dataset) -> None:
         self._dataset = value
 
     @abstractmethod
-    def load_partition(self, idx: int) -> Dataset:
+    def load_partition(self, node_id: int) -> Dataset:
         """Load a single partition based on the partition index.
 
         Parameters
         ----------
-        idx: int
+        node_id: int
             the index that corresponds to the requested partition
 
         Returns

diff --git a/datasets/flwr_datasets/partitioner/size_partitioner.py b/datasets/flwr_datasets/partitioner/size_partitioner.py
@@ -64,14 +64,14 @@ def __init__(
         # A flag to perform only a single compute to determine the indices
         self._node_id_to_indices_determined = False
 
-    def load_partition(self, idx: int) -> datasets.Dataset:
+    def load_partition(self, node_id: int) -> datasets.Dataset:
         """Load a single partition based on the partition index.
 
-        For this partitioner the number of samples is dependent on the partition idx.
+        The number of samples is dependent on the partition node_id.
 
         Parameters
         ----------
-        idx : int
+        node_id : int
             the index that corresponds to the requested partition
 
         Returns
@@ -82,7 +82,7 @@ def load_partition(self, idx: int) -> datasets.Dataset:
         # The partitioning is done lazily - only when the first partition is requested.
         # A single run creates the indices assignments for all the partition indices.
         self._determine_node_id_to_indices_if_needed()
-        return self.dataset.select(self._node_id_to_indices[idx])
+        return self.dataset.select(self._node_id_to_indices[node_id])
 
     @property
     def node_id_to_size(self) -> Dict[int, int]:

diff --git a/datasets/flwr_datasets/partitioner/square_partitioner.py b/datasets/flwr_datasets/partitioner/square_partitioner.py
@@ -21,7 +21,7 @@
 
 
 class SquarePartitioner(SizePartitioner):
-    """Partitioner creates partitions of size that are correlated with squared idx.
+    """Partitioner creates partitions of size that are correlated with squared node_id.
 
     The amount of data each client gets is correlated with the squared partition ID.
     For instance, if the IDs range from 1 to M, client with ID 1 gets 1 unit of data,
-Original file line number
+Diff line change
@@ Expand Up @@
     # Usage
-    The Flower Datasets exposes `FederatedDataset(dataset, partitioners)` abstraction to represent the dataset needed for federated learning/analytics. It has two powerful methods that let you handle the dataset preprocessing. They are `load_partition(idx, split)` and `load_full(split)`.
+    The Flower Datasets exposes `FederatedDataset(dataset, partitioners)` abstraction to represent the dataset needed for federated learning/analytics. It has two powerful methods that let you handle the dataset preprocessing. They are `load_partition(node_id, split)` and `load_full(split)`.
     Here's a quick example of how to partition the MNIST dataset:
@@ Expand Down @@