Skip to content

Commit

Permalink
Rename idx to node_id (#2590)
Browse files Browse the repository at this point in the history
  • Loading branch information
adam-narozniak authored Nov 15, 2023
1 parent 3c89754 commit 93b8a3d
Show file tree
Hide file tree
Showing 10 changed files with 21 additions and 21 deletions.
2 changes: 1 addition & 1 deletion datasets/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ If you plan to change the type of the dataset to run the code with your ML frame

# Usage

The Flower Datasets exposes `FederatedDataset(dataset, partitioners)` abstraction to represent the dataset needed for federated learning/analytics. It has two powerful methods that let you handle the dataset preprocessing. They are `load_partition(idx, split)` and `load_full(split)`.
The Flower Datasets exposes `FederatedDataset(dataset, partitioners)` abstraction to represent the dataset needed for federated learning/analytics. It has two powerful methods that let you handle the dataset preprocessing. They are `load_partition(node_id, split)` and `load_full(split)`.

Here's a quick example of how to partition the MNIST dataset:

Expand Down
6 changes: 3 additions & 3 deletions datasets/flwr_datasets/federated_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,15 @@ def __init__(
# Indicate if the dataset is prepared for `load_partition` or `load_full`
self._dataset_prepared: bool = False

def load_partition(self, idx: int, split: Optional[str] = None) -> Dataset:
def load_partition(self, node_id: int, split: Optional[str] = None) -> Dataset:
"""Load the partition specified by the idx in the selected split.
The dataset is downloaded only when the first call to `load_partition` or
`load_full` is made.
Parameters
----------
idx : int
node_id : int
Partition index for the selected split, idx in {0, ..., num_partitions - 1}.
split : Optional[str]
Name of the (partitioned) split (e.g. "train", "test"). You can skip this
Expand All @@ -135,7 +135,7 @@ def load_partition(self, idx: int, split: Optional[str] = None) -> Dataset:
self._check_if_split_possible_to_federate(split)
partitioner: Partitioner = self._partitioners[split]
self._assign_dataset_to_partitioner(split)
return partitioner.load_partition(idx)
return partitioner.load_partition(node_id)

def load_full(self, split: str) -> Dataset:
"""Load the full split of the dataset.
Expand Down
4 changes: 2 additions & 2 deletions datasets/flwr_datasets/partitioner/exponential_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@


class ExponentialPartitioner(SizePartitioner):
"""Partitioner creates partitions of size that are correlated with exp(idx).
"""Partitioner creates partitions of size that are correlated with exp(node_id).
The amount of data each client gets is correlated with the exponent of partition ID.
For instance, if the IDs range from 1 to M, client with ID 1 gets e units of
data, client 2 gets e^2 units, and so on, up to client M which gets e^M units.
The floor operation is applied on each of these numbers, it means floor(2.71...)
= 2; e^2 ~ 7.39 floor(7.39) = 7. The number is rounded down = the fraction is
always cut. The remainders of theses unassigned (fraction) samples is added to the
biggest partition (the one with the biggest idx).
biggest partition (the one with the biggest node_id).
Parameters
----------
Expand Down
6 changes: 3 additions & 3 deletions datasets/flwr_datasets/partitioner/iid_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ def __init__(self, num_partitions: int) -> None:
raise ValueError("The number of partitions must be greater than zero.")
self._num_partitions = num_partitions

def load_partition(self, idx: int) -> datasets.Dataset:
def load_partition(self, node_id: int) -> datasets.Dataset:
"""Load a single IID partition based on the partition index.
Parameters
----------
idx: int
node_id : int
the index that corresponds to the requested partition
Returns
Expand All @@ -48,5 +48,5 @@ def load_partition(self, idx: int) -> datasets.Dataset:
single dataset partition
"""
return self.dataset.shard(
num_shards=self._num_partitions, index=idx, contiguous=True
num_shards=self._num_partitions, index=node_id, contiguous=True
)
2 changes: 1 addition & 1 deletion datasets/flwr_datasets/partitioner/linear_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@


class LinearPartitioner(SizePartitioner):
"""Partitioner creates partitions of size that are linearly correlated with idx.
"""Partitioner creates partitions of size that are linearly correlated with node_id.
The amount of data each client gets is linearly correlated with the partition ID.
For instance, if the IDs range from 1 to M, client with ID 1 gets 1 unit of data,
Expand Down
6 changes: 3 additions & 3 deletions datasets/flwr_datasets/partitioner/natural_id_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ def _create_int_node_id_to_natural_id(self) -> None:
zip(range(len(unique_natural_ids)), unique_natural_ids)
)

def load_partition(self, idx: int) -> datasets.Dataset:
def load_partition(self, node_id: int) -> datasets.Dataset:
"""Load a single partition corresponding to a single `node_id`.
The choice of the partition is based on unique integers assigned to each
natural id present in the dataset in the `partition_by` column.
Parameters
----------
idx: int
node_id : int
the index that corresponds to the requested partition
Returns
Expand All @@ -62,7 +62,7 @@ def load_partition(self, idx: int) -> datasets.Dataset:
self._create_int_node_id_to_natural_id()

return self.dataset.filter(
lambda row: row[self._partition_by] == self._node_id_to_natural_id[idx]
lambda row: row[self._partition_by] == self._node_id_to_natural_id[node_id]
)

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_correct_number_of_partitions(
) -> None:
"""Test if the # of available partitions is equal to # of unique clients."""
_, partitioner = _dummy_setup(num_rows, num_unique_natural_ids)
_ = partitioner.load_partition(idx=0)
_ = partitioner.load_partition(node_id=0)
self.assertEqual(len(partitioner.node_id_to_natural_id), num_unique_natural_ids)

def test_cannot_set_node_id_to_natural_id(self) -> None:
Expand Down
4 changes: 2 additions & 2 deletions datasets/flwr_datasets/partitioner/partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ def dataset(self, value: Dataset) -> None:
self._dataset = value

@abstractmethod
def load_partition(self, idx: int) -> Dataset:
def load_partition(self, node_id: int) -> Dataset:
"""Load a single partition based on the partition index.
Parameters
----------
idx: int
node_id: int
the index that corresponds to the requested partition
Returns
Expand Down
8 changes: 4 additions & 4 deletions datasets/flwr_datasets/partitioner/size_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,14 @@ def __init__(
# A flag to perform only a single compute to determine the indices
self._node_id_to_indices_determined = False

def load_partition(self, idx: int) -> datasets.Dataset:
def load_partition(self, node_id: int) -> datasets.Dataset:
"""Load a single partition based on the partition index.
For this partitioner the number of samples is dependent on the partition idx.
The number of samples is dependent on the partition node_id.
Parameters
----------
idx : int
node_id : int
the index that corresponds to the requested partition
Returns
Expand All @@ -82,7 +82,7 @@ def load_partition(self, idx: int) -> datasets.Dataset:
# The partitioning is done lazily - only when the first partition is requested.
# A single run creates the indices assignments for all the partition indices.
self._determine_node_id_to_indices_if_needed()
return self.dataset.select(self._node_id_to_indices[idx])
return self.dataset.select(self._node_id_to_indices[node_id])

@property
def node_id_to_size(self) -> Dict[int, int]:
Expand Down
2 changes: 1 addition & 1 deletion datasets/flwr_datasets/partitioner/square_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


class SquarePartitioner(SizePartitioner):
"""Partitioner creates partitions of size that are correlated with squared idx.
"""Partitioner creates partitions of size that are correlated with squared node_id.
The amount of data each client gets is correlated with the squared partition ID.
For instance, if the IDs range from 1 to M, client with ID 1 gets 1 unit of data,
Expand Down

0 comments on commit 93b8a3d

Please sign in to comment.