Skip to content

Commit

Permalink
Move barrier calls in ParallelContext
Browse files Browse the repository at this point in the history
  • Loading branch information
NouamaneTazi committed Feb 14, 2024
1 parent 5d3eb67 commit 226c1c6
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions src/nanotron/parallel/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ def __init__(

def _init_parallel_groups(self):
"""Initialize 3D parallelism's all process groups."""
# NOTE: ensure all processes have joined the global group
# before creating other groups
dist.barrier()
world_size = int(os.environ["WORLD_SIZE"])
ranks = np.arange(0, world_size).reshape(
Expand Down Expand Up @@ -92,9 +90,9 @@ def _init_parallel_groups(self):
)

self.world_rank_matrix: np.ndarray = ranks
dist.barrier()

def create_new_group(self, all_groups_ranks: np.ndarray) -> dist.ProcessGroup:
dist.barrier()
rank = int(os.environ["RANK"])
new_group_containing_rank = None
for group_ranks in all_groups_ranks:
Expand All @@ -109,6 +107,7 @@ def create_new_group(self, all_groups_ranks: np.ndarray) -> dist.ProcessGroup:

if rank in sorted_ranks:
new_group_containing_rank = new_group
dist.barrier()
return new_group_containing_rank

def set_device(self):
Expand Down

0 comments on commit 226c1c6

Please sign in to comment.