From 6cc747a67f7b1ecf6811a2e0f284a8ca7e975450 Mon Sep 17 00:00:00 2001 From: Darshan Sanghani Date: Mon, 25 Nov 2024 14:47:34 -0800 Subject: [PATCH] Undo revert of the PR #1014 Summary: Reverts a rollback D66458621 Revert the Kineto rollback, this would have partially solved the issue since this part controls transmission of the metadata to the corresponding kernel. but record_param_comms in pytorch is the real issue and was still recording this metadata and would still make an invalid trace JSON when working with GPUs>30 (our truncation case). Differential Revision: D66475394 --- libkineto/src/output_json.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/libkineto/src/output_json.cpp b/libkineto/src/output_json.cpp index ca1bbab00..5a343f5e5 100644 --- a/libkineto/src/output_json.cpp +++ b/libkineto/src/output_json.cpp @@ -38,6 +38,8 @@ static constexpr const char* kOutSplit = "Out split size"; static constexpr const char* kProcessGroupName = "Process Group Name"; static constexpr const char* kProcessGroupDesc = "Process Group Description"; static constexpr const char* kGroupRanks = "Process Group Ranks"; +static constexpr const char* kInTensorsStart = "Input Tensors start"; +static constexpr const char* kOutTensorsStart = "Output Tensors start"; static constexpr const char* kRank = "Rank"; static constexpr const char* kP2pSrc = "Src Rank"; static constexpr const char* kP2pDst = "Dst Rank"; @@ -419,6 +421,24 @@ void ChromeTraceLogger::handleActivity(const libkineto::ITraceActivity& op) { kDtype, dtype)); } + const auto& input_tensor_starts = + collectiveRecord->getMetadataValue(kInTensorsStart); + const auto output_tensor_starts = + collectiveRecord->getMetadataValue(kOutTensorsStart); + if (!input_tensor_starts.empty()) { + if (!arg_values.empty()) { + arg_values.append(","); + } + arg_values.append( + fmt::format(" \"{}\": {}", kInTensorsStart, input_tensor_starts)); + } + if (!output_tensor_starts.empty()) { + if (!arg_values.empty()) { + arg_values.append(","); + } + arg_values.append( + fmt::format(" \"{}\": {}", kOutTensorsStart, output_tensor_starts)); + } // In/out split size are valid for all_to_all const auto& inSplitSize = collectiveRecord->getMetadataValue(kInSplit); const auto& outSplitSize = collectiveRecord->getMetadataValue(kOutSplit);