Skip to content

Commit

Permalink
sharp: Add trace option at rank 0 for sharp colls
Browse files Browse the repository at this point in the history
  • Loading branch information
bureddy committed May 30, 2024
1 parent 3fb6c8d commit 7e482ec
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions src/sharp_plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ NCCL_PARAM(SharpGroupSizeThresh, "SHARP_GROUP_SIZE_THRESH", 2);
NCCL_PARAM(SharpV3Datatypes, "SHARP_V3_DATATYPES", 2);
NCCL_PARAM(SharpDisableRS, "SHARP_DISABLE_REDUCE_SCATTER", 0);
NCCL_PARAM(SharpDisableAG, "SHARP_DISABLE_ALLGATHER", 0);
NCCL_PARAM(enableSharpTrace, "SHARP_COLL_TRACE", 0);

enum ncclSharpRequestType {
NCCL_SHARP_REQ_SHARP_COLL,
Expand Down Expand Up @@ -500,6 +501,9 @@ ncclResult_t ncclSharpIallreduce(void* collComm, void* sendData, void* recvData,
reduce_spec.op = op_type;
reduce_spec.aggr_mode = SHARP_AGGREGATION_NONE;

if (ncclParamenableSharpTrace() && cComm->rank == 0)
INFO(NCCL_COLL, "Allreduce count:%d, op:%d dtype:%d ", count, op_type, sharp_type);

#if BLOCKING==0
if (SHARP_COLL_SUCCESS != sharp_coll_do_allreduce_nb(cComm->sharpCollComm, &reduce_spec, &req->sharpRequest)) {
WARN("SHARP allreduce failed\n");
Expand Down Expand Up @@ -546,6 +550,10 @@ ncclResult_t ncclSharpIallgather(void* collComm, void* sendData, int nRecvParts,
gather_spec.size = recvParts[0].size;
gather_spec.offset = windowOffset;

if (ncclParamenableSharpTrace() && cComm->rank == 0)
INFO(NCCL_COLL, "Allgather size:%lu bytesPerRank:%lu windowOffset:%lu windowBytes:%lu",
recvParts[0].size, bytesPerRank, windowOffset, windowBytes);

#if BLOCKING==0
if (SHARP_COLL_SUCCESS != sharp_coll_do_allgather_nb(cComm->sharpCollComm, &gather_spec, &req->sharpRequest)) {
WARN("SHARP Allgather failed\n");
Expand Down Expand Up @@ -611,6 +619,10 @@ ncclResult_t ncclSharpIreducescatter(void* collComm, int nSendParts, ncclNetSGE_
reduce_spec.op = op_type;
reduce_spec.aggr_mode = SHARP_AGGREGATION_NONE;

if (ncclParamenableSharpTrace() && cComm->rank == 0)
INFO(NCCL_COLL, "ReduceScatter bytesPerRank:%lu windowOffset:%lu windowBytes:%lu op_type:%d dtype:%d",
bytesPerRank, windowOffset, windowBytes, op_type, sharp_type);

#if BLOCKING==0
if (SHARP_COLL_SUCCESS != sharp_coll_do_reduce_scatter_nb(cComm->sharpCollComm, &reduce_spec, &req->sharpRequest)) {
WARN("SHARP reduce_scatter failed\n");
Expand Down

0 comments on commit 7e482ec

Please sign in to comment.