Commit 3ba5b2f pytorchbot
committed
1 parent b544669 commit 3ba5b2f Copy full SHA for 3ba5b2f
File tree 1 file changed +10
-2
lines changed
1 file changed +10
-2
lines changed Original file line number Diff line number Diff line change @@ -239,7 +239,11 @@ def intra_and_cross_node_pg_2D(
239
239
group * devices_per_node : (group + 1 ) * devices_per_node
240
240
]
241
241
intra_pg_groups [group_rank ].append (intra_pg_peers )
242
- curr_intra_pg = dist .new_group (backend = backend , ranks = intra_pg_peers )
242
+ curr_intra_pg = dist .new_group (
243
+ backend = backend ,
244
+ ranks = intra_pg_peers ,
245
+ group_desc = "sharding_intra_pg" ,
246
+ )
243
247
if my_rank in intra_pg_peers :
244
248
logger .warning (
245
249
f"[Connection] 2D rank { my_rank } -> intra_pg_peers { intra_pg_peers } "
@@ -256,7 +260,11 @@ def intra_and_cross_node_pg_2D(
256
260
intra_pg_group [j ][cross_group_rank ]
257
261
for j in range (len (intra_pg_group ))
258
262
]
259
- curr_cross_pg = dist .new_group (backend = backend , ranks = cross_pg_peers )
263
+ curr_cross_pg = dist .new_group (
264
+ backend = backend ,
265
+ ranks = cross_pg_peers ,
266
+ group_desc = "sharding_cross_pg" ,
267
+ )
260
268
if my_rank in cross_pg_peers :
261
269
logger .warning (
262
270
f"[Connection] 2D rank { my_rank } -> cross_pg_peers { cross_pg_peers } "
You can’t perform that action at this time.
0 commit comments