Skip to content

Commit 40b9db2

Browse files
sanrisefacebook-github-bot
authored andcommitted
Record comms input and output tensor information (#1014)
Summary: Just copy the output from NCCL metadata about tensor information. Allows for easier analysis of kernel memory access patterns in downstream tools. Reviewed By: sraikund16 Differential Revision: D65785010
1 parent 338140f commit 40b9db2

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

libkineto/src/output_json.cpp

+20
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ static constexpr const char* kOutSplit = "Out split size";
3838
static constexpr const char* kProcessGroupName = "Process Group Name";
3939
static constexpr const char* kProcessGroupDesc = "Process Group Description";
4040
static constexpr const char* kGroupRanks = "Process Group Ranks";
41+
static constexpr const char* kInTensorsStart = "Input Tensors start";
42+
static constexpr const char* kOutTensorsStart = "Output Tensors start";
4143
static constexpr const char* kRank = "Rank";
4244
static constexpr const char* kP2pSrc = "Src Rank";
4345
static constexpr const char* kP2pDst = "Dst Rank";
@@ -419,6 +421,24 @@ void ChromeTraceLogger::handleActivity(const libkineto::ITraceActivity& op) {
419421
kDtype,
420422
dtype));
421423
}
424+
const auto& input_tensor_starts =
425+
collectiveRecord->getMetadataValue(kInTensorsStart);
426+
const auto output_tensor_starts =
427+
collectiveRecord->getMetadataValue(kOutTensorsStart);
428+
if (!input_tensor_starts.empty()) {
429+
if (!arg_values.empty()) {
430+
arg_values.append(",");
431+
}
432+
arg_values.append(
433+
fmt::format(" \"{}\": {}", kInTensorsStart, input_tensor_starts));
434+
}
435+
if (!output_tensor_starts.empty()) {
436+
if (!arg_values.empty()) {
437+
arg_values.append(",");
438+
}
439+
arg_values.append(
440+
fmt::format(" \"{}\": {}", kOutTensorsStart, output_tensor_starts));
441+
}
422442
// In/out split size are valid for all_to_all
423443
const auto& inSplitSize = collectiveRecord->getMetadataValue(kInSplit);
424444
const auto& outSplitSize = collectiveRecord->getMetadataValue(kOutSplit);

0 commit comments

Comments
 (0)