From 794e322baa778a4f246a37d7ce230687eb540ae0 Mon Sep 17 00:00:00 2001 From: "augusto.yjh" Date: Fri, 26 Jul 2024 17:51:20 +0800 Subject: [PATCH 1/9] make CollectTrace for profiling by iteration async --- libkineto/src/CuptiActivityProfiler.cpp | 39 +++++++++++++++++++++++++ libkineto/src/CuptiActivityProfiler.h | 4 ++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/libkineto/src/CuptiActivityProfiler.cpp b/libkineto/src/CuptiActivityProfiler.cpp index 1509de00f..854f5ec61 100644 --- a/libkineto/src/CuptiActivityProfiler.cpp +++ b/libkineto/src/CuptiActivityProfiler.cpp @@ -191,6 +191,17 @@ std::ostream& operator<<(std::ostream& oss, const CuptiActivityProfiler::ErrorCo return oss; } +CuptiActivityProfiler::~CuptiActivityProfiler() { + if(stopByIterThread) { + std::lock_guard guard(mutex_); + if (stopByIterThread) { + stopByIterThread->join(); + delete stopByIterThread; + stopByIterThread = nullptr; + } + } +} + void CuptiActivityProfiler::transferCpuTrace( std::unique_ptr cpuTrace) { std::lock_guard guard(mutex_); @@ -1219,6 +1230,24 @@ const time_point CuptiActivityProfiler::performRunLoopStep( LOG(INFO) << "Tracing complete."; VLOG_IF(1, currentIter > 0) << "This state change was invoked by application's step() call"; + if (currentIter > 0) { + if (!stopByIterThread) { + std::lock_guard guard(mutex_); + if (!stopByIterThread) { + stopByIterThread = new std::thread([collection_done, this, now](){ + if (libkineto::api().client()) { + libkineto::api().client()->stop(); + } + std::lock_guard guard(mutex_); + stopTraceInternal(now); + VLOG_IF(0, collection_done) << "Reached profile end time"; + UST_LOGGER_MARK_COMPLETED(kCollectionStage); + }); + } + } + break; + } + if (libkineto::api().client()) { libkineto::api().client()->stop(); } @@ -1252,6 +1281,16 @@ const time_point CuptiActivityProfiler::performRunLoopStep( if (currentIter >= 0) { return new_wakeup_time; } + + if (stopByIterThread) { + std::lock_guard guard(mutex_); + if (stopByIterThread) { + stopByIterThread->join(); + delete stopByIterThread; + stopByIterThread = nullptr; + } + } + // FIXME: Probably want to allow interruption here // for quickly handling trace request via synchronous API std::lock_guard guard(mutex_); diff --git a/libkineto/src/CuptiActivityProfiler.h b/libkineto/src/CuptiActivityProfiler.h index ff8c70d6b..eb6a62695 100644 --- a/libkineto/src/CuptiActivityProfiler.h +++ b/libkineto/src/CuptiActivityProfiler.h @@ -116,7 +116,7 @@ class CuptiActivityProfiler { CuptiActivityProfiler(RoctracerActivityApi& rai, bool cpuOnly); CuptiActivityProfiler(const CuptiActivityProfiler&) = delete; CuptiActivityProfiler& operator=(const CuptiActivityProfiler&) = delete; - + ~CuptiActivityProfiler(); bool isActive() const { return currentRunloopState_ != RunloopState::WaitForRequest; } @@ -474,6 +474,8 @@ class CuptiActivityProfiler { // Mutex to protect non-atomic access to below state std::mutex mutex_; + std::thread * stopByIterThread = nullptr; + // Runloop phase std::atomic currentRunloopState_{RunloopState::WaitForRequest}; From 44ae7fb4d672390d06811d92336ecb7ffa3bd175 Mon Sep 17 00:00:00 2001 From: "augusto.yjh" Date: Tue, 6 Aug 2024 16:43:07 +0800 Subject: [PATCH 2/9] add method collectTrace to collect both cpu and gpu traces --- libkineto/src/CuptiActivityProfiler.cpp | 73 ++++++++++--------------- libkineto/src/CuptiActivityProfiler.h | 7 ++- 2 files changed, 35 insertions(+), 45 deletions(-) diff --git a/libkineto/src/CuptiActivityProfiler.cpp b/libkineto/src/CuptiActivityProfiler.cpp index 854f5ec61..c63e8e534 100644 --- a/libkineto/src/CuptiActivityProfiler.cpp +++ b/libkineto/src/CuptiActivityProfiler.cpp @@ -192,13 +192,8 @@ std::ostream& operator<<(std::ostream& oss, const CuptiActivityProfiler::ErrorCo } CuptiActivityProfiler::~CuptiActivityProfiler() { - if(stopByIterThread) { - std::lock_guard guard(mutex_); - if (stopByIterThread) { - stopByIterThread->join(); - delete stopByIterThread; - stopByIterThread = nullptr; - } + if(collectTraceThread && collectTraceThread->joinable()) { + collectTraceThread->join(); } } @@ -1084,6 +1079,23 @@ void CuptiActivityProfiler::configure( currentRunloopState_ = RunloopState::Warmup; } +void CuptiActivityProfiler::collectTrace(bool collection_done, + const std::chrono::time_point &now) { + + if (libkineto::api().client()) { + libkineto::api().client()->stop(); + } + +#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) + ecs_.cupti_stopped_early = cupti_.stopCollection; +#endif // HAS_CUPTI || HAS_ROCTRACER + + std::lock_guard guard(mutex_); + stopTraceInternal(now); + VLOG_IF(0, collection_done) << "Reached profile end time"; + UST_LOGGER_MARK_COMPLETED(kCollectionStage); +} + void CuptiActivityProfiler::toggleCollectionDynamic(const bool enable){ #ifdef HAS_CUPTI if (enable) { @@ -1230,39 +1242,14 @@ const time_point CuptiActivityProfiler::performRunLoopStep( LOG(INFO) << "Tracing complete."; VLOG_IF(1, currentIter > 0) << "This state change was invoked by application's step() call"; - if (currentIter > 0) { - if (!stopByIterThread) { - std::lock_guard guard(mutex_); - if (!stopByIterThread) { - stopByIterThread = new std::thread([collection_done, this, now](){ - if (libkineto::api().client()) { - libkineto::api().client()->stop(); - } - std::lock_guard guard(mutex_); - stopTraceInternal(now); - VLOG_IF(0, collection_done) << "Reached profile end time"; - UST_LOGGER_MARK_COMPLETED(kCollectionStage); - }); - } - } + // currentIter > 0 means this is an iteration-based collection, triggered by pytorch main thread, + // it should be executed in another thread in case pytorch main thread is blocked + if (currentIter > 0 && !collectTraceThread) { + std::lock_guard guard(mutex_); + collectTraceThread = std::make_unique(&CuptiActivityProfiler::collectTrace, this, collection_done, now); break; } - - if (libkineto::api().client()) { - libkineto::api().client()->stop(); - } - -#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) - if (cupti_.stopCollection) { - ecs_.cupti_stopped_early = cupti_.stopCollection; - LOG(ERROR) << "State: CollectTrace stopped by CUPTI. (Buffer size configured is " << config_->activitiesMaxGpuBufferSize() / 1024 / 1024 << "MB)"; - } -#endif // HAS_CUPTI || HAS_ROCTRACER - - std::lock_guard guard(mutex_); - stopTraceInternal(now); - VLOG_IF(0, collection_done) << "Reached profile end time"; - UST_LOGGER_MARK_COMPLETED(kCollectionStage); + collectTrace(collection_done, now); } else if (derivedConfig_->isProfilingByIteration()) { // nothing to do here } else if (now < derivedConfig_->profileEndTime() && @@ -1282,13 +1269,11 @@ const time_point CuptiActivityProfiler::performRunLoopStep( return new_wakeup_time; } - if (stopByIterThread) { + // Before processing, we should wait for collectTrace thread to be done. + if (collectTraceThread && collectTraceThread->joinable()) { std::lock_guard guard(mutex_); - if (stopByIterThread) { - stopByIterThread->join(); - delete stopByIterThread; - stopByIterThread = nullptr; - } + collectTraceThread->join(); + collectTraceThread.reset(nullptr); } // FIXME: Probably want to allow interruption here diff --git a/libkineto/src/CuptiActivityProfiler.h b/libkineto/src/CuptiActivityProfiler.h index eb6a62695..2f7634eb9 100644 --- a/libkineto/src/CuptiActivityProfiler.h +++ b/libkineto/src/CuptiActivityProfiler.h @@ -164,6 +164,9 @@ class CuptiActivityProfiler { stopTraceInternal(now); } + // Collect CPU and GPU traces + void collectTrace(bool collectionDone, const std::chrono::time_point& now ); + // Process CPU and GPU traces void processTrace(ActivityLogger& logger) { std::lock_guard guard(mutex_); @@ -474,7 +477,9 @@ class CuptiActivityProfiler { // Mutex to protect non-atomic access to below state std::mutex mutex_; - std::thread * stopByIterThread = nullptr; + // Add a thread to collect both cpu and gpu traces in case torch main thread + // is blocked when profiling by iterations is enabled. Issue #953 shows details. + std::unique_ptr collectTraceThread; // Runloop phase std::atomic currentRunloopState_{RunloopState::WaitForRequest}; From d3396705481a2eecb8b97a216f0ec747db946f84 Mon Sep 17 00:00:00 2001 From: "augusto.yjh" Date: Tue, 6 Aug 2024 16:46:03 +0800 Subject: [PATCH 3/9] add function ensureCollectTraceDone to wait and cleanup collectTraceThread --- libkineto/src/CuptiActivityProfiler.cpp | 13 ++++++++----- libkineto/src/CuptiActivityProfiler.h | 2 ++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/libkineto/src/CuptiActivityProfiler.cpp b/libkineto/src/CuptiActivityProfiler.cpp index c63e8e534..8a921f9dc 100644 --- a/libkineto/src/CuptiActivityProfiler.cpp +++ b/libkineto/src/CuptiActivityProfiler.cpp @@ -1096,6 +1096,13 @@ void CuptiActivityProfiler::collectTrace(bool collection_done, UST_LOGGER_MARK_COMPLETED(kCollectionStage); } +void CuptiActivityProfiler::ensureCollectTraceDone() { + if (collectTraceThread && collectTraceThread->joinable()) { + std::lock_guard guard(mutex_); + collectTraceThread->join(); + collectTraceThread.reset(nullptr); + } +} void CuptiActivityProfiler::toggleCollectionDynamic(const bool enable){ #ifdef HAS_CUPTI if (enable) { @@ -1270,11 +1277,7 @@ const time_point CuptiActivityProfiler::performRunLoopStep( } // Before processing, we should wait for collectTrace thread to be done. - if (collectTraceThread && collectTraceThread->joinable()) { - std::lock_guard guard(mutex_); - collectTraceThread->join(); - collectTraceThread.reset(nullptr); - } + ensureCollectTraceDone(); // FIXME: Probably want to allow interruption here // for quickly handling trace request via synchronous API diff --git a/libkineto/src/CuptiActivityProfiler.h b/libkineto/src/CuptiActivityProfiler.h index 2f7634eb9..3a6a74ebd 100644 --- a/libkineto/src/CuptiActivityProfiler.h +++ b/libkineto/src/CuptiActivityProfiler.h @@ -167,6 +167,8 @@ class CuptiActivityProfiler { // Collect CPU and GPU traces void collectTrace(bool collectionDone, const std::chrono::time_point& now ); + // Ensure collectTrace is done + void ensureCollectTraceDone(); // Process CPU and GPU traces void processTrace(ActivityLogger& logger) { std::lock_guard guard(mutex_); From 7d5d14780c74288da668dac694ce9f9251992268 Mon Sep 17 00:00:00 2001 From: "augusto.yjh" Date: Wed, 7 Aug 2024 11:11:18 +0800 Subject: [PATCH 4/9] do not call collectTrace in main thread --- libkineto/src/CuptiActivityProfiler.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libkineto/src/CuptiActivityProfiler.cpp b/libkineto/src/CuptiActivityProfiler.cpp index 8a921f9dc..4e3abca8b 100644 --- a/libkineto/src/CuptiActivityProfiler.cpp +++ b/libkineto/src/CuptiActivityProfiler.cpp @@ -1251,9 +1251,12 @@ const time_point CuptiActivityProfiler::performRunLoopStep( // currentIter > 0 means this is an iteration-based collection, triggered by pytorch main thread, // it should be executed in another thread in case pytorch main thread is blocked - if (currentIter > 0 && !collectTraceThread) { - std::lock_guard guard(mutex_); - collectTraceThread = std::make_unique(&CuptiActivityProfiler::collectTrace, this, collection_done, now); + if (currentIter > 0) { + // if collectTraceThread is already running, there's no need to execute collectTrace twice. + if(!collectTraceThread){ + std::lock_guard guard(mutex_); + collectTraceThread = std::make_unique(&CuptiActivityProfiler::collectTrace, this, collection_done, now); + } break; } collectTrace(collection_done, now); From bb90e40c22bf1a69d7d6cdb4d167bcda46520a37 Mon Sep 17 00:00:00 2001 From: Shivam Raikundalia Date: Thu, 10 Oct 2024 17:29:14 -0700 Subject: [PATCH 5/9] variable name --- libkineto/src/CuptiActivityProfiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libkineto/src/CuptiActivityProfiler.cpp b/libkineto/src/CuptiActivityProfiler.cpp index 55cf18660..779939518 100644 --- a/libkineto/src/CuptiActivityProfiler.cpp +++ b/libkineto/src/CuptiActivityProfiler.cpp @@ -1169,7 +1169,7 @@ void CuptiActivityProfiler::toggleCollectionDynamic(const bool enable) { void CuptiActivityProfiler::startTraceInternal( const time_point& now) { - captu/WindowStartTime_ = libkineto::timeSinceEpoch(now); + captureWindowStartTime_ = libkineto::timeSinceEpoch(now); VLOG(0) << "Warmup -> CollectTrace"; for (auto& session : sessions_) { LOG(INFO) << "Starting child profiler session"; From 70e1628bad5631bcbba679a9cc6246aa564a09a5 Mon Sep 17 00:00:00 2001 From: Shivam Raikundalia Date: Thu, 10 Oct 2024 17:34:23 -0700 Subject: [PATCH 6/9] recursive mutex --- libkineto/src/CuptiActivityProfiler.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libkineto/src/CuptiActivityProfiler.cpp b/libkineto/src/CuptiActivityProfiler.cpp index 779939518..76b64ca37 100644 --- a/libkineto/src/CuptiActivityProfiler.cpp +++ b/libkineto/src/CuptiActivityProfiler.cpp @@ -1137,7 +1137,7 @@ void CuptiActivityProfiler::collectTrace(bool collection_done, ecs_.cupti_stopped_early = cupti_.stopCollection; #endif // HAS_CUPTI || HAS_ROCTRACER - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); stopTraceInternal(now); VLOG_IF(0, collection_done) << "Reached profile end time"; UST_LOGGER_MARK_COMPLETED(kCollectionStage); @@ -1145,7 +1145,7 @@ void CuptiActivityProfiler::collectTrace(bool collection_done, void CuptiActivityProfiler::ensureCollectTraceDone() { if (collectTraceThread && collectTraceThread->joinable()) { - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); collectTraceThread->join(); collectTraceThread.reset(nullptr); } @@ -1304,7 +1304,7 @@ const time_point CuptiActivityProfiler::performRunLoopStep( if (currentIter > 0) { // if collectTraceThread is already running, there's no need to execute collectTrace twice. if(!collectTraceThread){ - std::lock_guard guard(mutex_); + std::lock_guard guard(mutex_); collectTraceThread = std::make_unique(&CuptiActivityProfiler::collectTrace, this, collection_done, now); } break; From 00dcd6e54a101ab53417693d07cc25a895222d70 Mon Sep 17 00:00:00 2001 From: Shivam Raikundalia Date: Fri, 11 Oct 2024 14:03:35 -0700 Subject: [PATCH 7/9] fixes" --- libkineto/src/CuptiActivityProfiler.cpp | 51 +++++++++++--------- libkineto/src/CuptiActivityProfiler.h | 9 ++-- libkineto/test/CuptiActivityProfilerTest.cpp | 2 +- 3 files changed, 35 insertions(+), 27 deletions(-) diff --git a/libkineto/src/CuptiActivityProfiler.cpp b/libkineto/src/CuptiActivityProfiler.cpp index 76b64ca37..807768f56 100644 --- a/libkineto/src/CuptiActivityProfiler.cpp +++ b/libkineto/src/CuptiActivityProfiler.cpp @@ -209,8 +209,8 @@ std::ostream& operator<<( } CuptiActivityProfiler::~CuptiActivityProfiler() { - if(collectTraceThread && collectTraceThread->joinable()) { - collectTraceThread->join(); + if (collectTraceThread_ && collectTraceThread_->joinable()) { + collectTraceThread_->join(); } } @@ -1126,28 +1126,26 @@ void CuptiActivityProfiler::configure( currentRunloopState_ = RunloopState::Warmup; } -void CuptiActivityProfiler::collectTrace(bool collection_done, - const std::chrono::time_point &now) { - - if (libkineto::api().client()) { - libkineto::api().client()->stop(); - } +void CuptiActivityProfiler::collectTrace( + bool collection_done, + const std::chrono::time_point& now) { + if (libkineto::api().client()) { + libkineto::api().client()->stop(); + } #if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) - ecs_.cupti_stopped_early = cupti_.stopCollection; + ecs_.cupti_stopped_early = cupti_.stopCollection; #endif // HAS_CUPTI || HAS_ROCTRACER - - std::lock_guard guard(mutex_); - stopTraceInternal(now); - VLOG_IF(0, collection_done) << "Reached profile end time"; - UST_LOGGER_MARK_COMPLETED(kCollectionStage); + std::lock_guard guard(mutex_); + stopTraceInternal(now); + VLOG_IF(0, collection_done) << "Reached profile end time"; + UST_LOGGER_MARK_COMPLETED(kCollectionStage); } void CuptiActivityProfiler::ensureCollectTraceDone() { - if (collectTraceThread && collectTraceThread->joinable()) { - std::lock_guard guard(mutex_); - collectTraceThread->join(); - collectTraceThread.reset(nullptr); + if (collectTraceThread_ && collectTraceThread_->joinable()) { + collectTraceThread_->join(); + collectTraceThread_.reset(nullptr); } } void CuptiActivityProfiler::toggleCollectionDynamic(const bool enable) { @@ -1299,13 +1297,20 @@ const time_point CuptiActivityProfiler::performRunLoopStep( VLOG_IF(1, currentIter > 0) << "This state change was invoked by application's step() call"; - // currentIter > 0 means this is an iteration-based collection, triggered by pytorch main thread, - // it should be executed in another thread in case pytorch main thread is blocked + // currentIter > 0 means this is an iteration-based collection, + // triggered by pytorch main thread, it should be executed in another + // thread in case pytorch main thread is blocked if (currentIter > 0) { - // if collectTraceThread is already running, there's no need to execute collectTrace twice. - if(!collectTraceThread){ + // if collectTraceThread_ is already running, there's no need to + // execute collectTrace twice. + LOG(WARNING) << "LAUNCHING THREAD FOR collectTrace()"; + if (!collectTraceThread_) { std::lock_guard guard(mutex_); - collectTraceThread = std::make_unique(&CuptiActivityProfiler::collectTrace, this, collection_done, now); + collectTraceThread_ = std::make_unique( + &CuptiActivityProfiler::collectTrace, + this, + collection_done, + now); } break; } diff --git a/libkineto/src/CuptiActivityProfiler.h b/libkineto/src/CuptiActivityProfiler.h index d718ea508..d47bba33b 100644 --- a/libkineto/src/CuptiActivityProfiler.h +++ b/libkineto/src/CuptiActivityProfiler.h @@ -171,7 +171,9 @@ class CuptiActivityProfiler { } // Collect CPU and GPU traces - void collectTrace(bool collectionDone, const std::chrono::time_point& now ); + void collectTrace( + bool collectionDone, + const std::chrono::time_point& now); // Ensure collectTrace is done void ensureCollectTraceDone(); @@ -489,8 +491,9 @@ class CuptiActivityProfiler { std::recursive_mutex mutex_; // Add a thread to collect both cpu and gpu traces in case torch main thread - // is blocked when profiling by iterations is enabled. Issue #953 shows details. - std::unique_ptr collectTraceThread; + // is blocked when profiling by iterations is enabled. Issue #953 shows + // details. + std::unique_ptr collectTraceThread_{nullptr}; // Runloop phase std::atomic currentRunloopState_{RunloopState::WaitForRequest}; diff --git a/libkineto/test/CuptiActivityProfilerTest.cpp b/libkineto/test/CuptiActivityProfilerTest.cpp index eef9588d4..713a8b83b 100644 --- a/libkineto/test/CuptiActivityProfilerTest.cpp +++ b/libkineto/test/CuptiActivityProfilerTest.cpp @@ -443,8 +443,8 @@ TEST(CuptiActivityProfiler, AsyncTraceUsingIter) { EXPECT_TRUE(profiler.isActive()); auto nextnext = next + milliseconds(1000); - profiler.performRunLoopStep(nextnext, nextnext); + profiler.ensureCollectTraceDone(); profiler.performRunLoopStep(nextnext, nextnext); // Assert that tracing has completed From 5898b51fb9ec359b6b6f1371a911167d7da0f966 Mon Sep 17 00:00:00 2001 From: Shivam Raikundalia Date: Mon, 14 Oct 2024 14:41:14 -0700 Subject: [PATCH 8/9] add error logic --- libkineto/src/CuptiActivityProfiler.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libkineto/src/CuptiActivityProfiler.cpp b/libkineto/src/CuptiActivityProfiler.cpp index 807768f56..702bff012 100644 --- a/libkineto/src/CuptiActivityProfiler.cpp +++ b/libkineto/src/CuptiActivityProfiler.cpp @@ -1134,7 +1134,12 @@ void CuptiActivityProfiler::collectTrace( } #if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) - ecs_.cupti_stopped_early = cupti_.stopCollection; + if (cupti_.stopCollection) { + ecs_.cupti_stopped_early = cupti_.stopCollection; + LOG(ERROR) + << "State: CollectTrace stopped by CUPTI. (Buffer size configured is " + << config_->activitiesMaxGpuBufferSize() / 1024 / 1024 << "MB)"; + } #endif // HAS_CUPTI || HAS_ROCTRACER std::lock_guard guard(mutex_); stopTraceInternal(now); From 833e61b5325d75741dfa8da0bf2fe7962938f789 Mon Sep 17 00:00:00 2001 From: Shivam Raikundalia Date: Tue, 15 Oct 2024 09:26:41 -0700 Subject: [PATCH 9/9] pr comments --- libkineto/src/CuptiActivityProfiler.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libkineto/src/CuptiActivityProfiler.cpp b/libkineto/src/CuptiActivityProfiler.cpp index 702bff012..0f742d4ee 100644 --- a/libkineto/src/CuptiActivityProfiler.cpp +++ b/libkineto/src/CuptiActivityProfiler.cpp @@ -1299,16 +1299,15 @@ const time_point CuptiActivityProfiler::performRunLoopStep( ) { // Update runloop state first to prevent further updates to shared state LOG(INFO) << "Tracing complete."; - VLOG_IF(1, currentIter > 0) + VLOG_IF(1, currentIter >= 0) << "This state change was invoked by application's step() call"; - // currentIter > 0 means this is an iteration-based collection, + // currentIter >= 0 means this is an iteration-based collection, // triggered by pytorch main thread, it should be executed in another // thread in case pytorch main thread is blocked - if (currentIter > 0) { + if (currentIter >= 0) { // if collectTraceThread_ is already running, there's no need to // execute collectTrace twice. - LOG(WARNING) << "LAUNCHING THREAD FOR collectTrace()"; if (!collectTraceThread_) { std::lock_guard guard(mutex_); collectTraceThread_ = std::make_unique(