Skip to content

Commit

Permalink
fixes"
Browse files Browse the repository at this point in the history
  • Loading branch information
sraikund16 committed Oct 11, 2024
1 parent 70e1628 commit 00dcd6e
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 27 deletions.
51 changes: 28 additions & 23 deletions libkineto/src/CuptiActivityProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ std::ostream& operator<<(
}

CuptiActivityProfiler::~CuptiActivityProfiler() {
if(collectTraceThread && collectTraceThread->joinable()) {
collectTraceThread->join();
if (collectTraceThread_ && collectTraceThread_->joinable()) {
collectTraceThread_->join();
}
}

Expand Down Expand Up @@ -1126,28 +1126,26 @@ void CuptiActivityProfiler::configure(
currentRunloopState_ = RunloopState::Warmup;
}

void CuptiActivityProfiler::collectTrace(bool collection_done,
const std::chrono::time_point<std::chrono::system_clock> &now) {

if (libkineto::api().client()) {
libkineto::api().client()->stop();
}
void CuptiActivityProfiler::collectTrace(
bool collection_done,
const std::chrono::time_point<std::chrono::system_clock>& now) {
if (libkineto::api().client()) {
libkineto::api().client()->stop();
}

#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER)
ecs_.cupti_stopped_early = cupti_.stopCollection;
ecs_.cupti_stopped_early = cupti_.stopCollection;
#endif // HAS_CUPTI || HAS_ROCTRACER

std::lock_guard<std::recursive_mutex> guard(mutex_);
stopTraceInternal(now);
VLOG_IF(0, collection_done) << "Reached profile end time";
UST_LOGGER_MARK_COMPLETED(kCollectionStage);
std::lock_guard<std::recursive_mutex> guard(mutex_);
stopTraceInternal(now);
VLOG_IF(0, collection_done) << "Reached profile end time";
UST_LOGGER_MARK_COMPLETED(kCollectionStage);
}

void CuptiActivityProfiler::ensureCollectTraceDone() {
if (collectTraceThread && collectTraceThread->joinable()) {
std::lock_guard<std::recursive_mutex> guard(mutex_);
collectTraceThread->join();
collectTraceThread.reset(nullptr);
if (collectTraceThread_ && collectTraceThread_->joinable()) {
collectTraceThread_->join();
collectTraceThread_.reset(nullptr);
}
}
void CuptiActivityProfiler::toggleCollectionDynamic(const bool enable) {
Expand Down Expand Up @@ -1299,13 +1297,20 @@ const time_point<system_clock> CuptiActivityProfiler::performRunLoopStep(
VLOG_IF(1, currentIter > 0)
<< "This state change was invoked by application's step() call";

// currentIter > 0 means this is an iteration-based collection, triggered by pytorch main thread,
// it should be executed in another thread in case pytorch main thread is blocked
// currentIter > 0 means this is an iteration-based collection,
// triggered by pytorch main thread, it should be executed in another
// thread in case pytorch main thread is blocked
if (currentIter > 0) {
// if collectTraceThread is already running, there's no need to execute collectTrace twice.
if(!collectTraceThread){
// if collectTraceThread_ is already running, there's no need to
// execute collectTrace twice.
LOG(WARNING) << "LAUNCHING THREAD FOR collectTrace()";
if (!collectTraceThread_) {
std::lock_guard<std::recursive_mutex> guard(mutex_);
collectTraceThread = std::make_unique<std::thread>(&CuptiActivityProfiler::collectTrace, this, collection_done, now);
collectTraceThread_ = std::make_unique<std::thread>(
&CuptiActivityProfiler::collectTrace,
this,
collection_done,
now);
}
break;
}
Expand Down
9 changes: 6 additions & 3 deletions libkineto/src/CuptiActivityProfiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ class CuptiActivityProfiler {
}

// Collect CPU and GPU traces
void collectTrace(bool collectionDone, const std::chrono::time_point<std::chrono::system_clock>& now );
void collectTrace(
bool collectionDone,
const std::chrono::time_point<std::chrono::system_clock>& now);

// Ensure collectTrace is done
void ensureCollectTraceDone();
Expand Down Expand Up @@ -489,8 +491,9 @@ class CuptiActivityProfiler {
std::recursive_mutex mutex_;

// Add a thread to collect both cpu and gpu traces in case torch main thread
// is blocked when profiling by iterations is enabled. Issue #953 shows details.
std::unique_ptr<std::thread> collectTraceThread;
// is blocked when profiling by iterations is enabled. Issue #953 shows
// details.
std::unique_ptr<std::thread> collectTraceThread_{nullptr};

// Runloop phase
std::atomic<RunloopState> currentRunloopState_{RunloopState::WaitForRequest};
Expand Down
2 changes: 1 addition & 1 deletion libkineto/test/CuptiActivityProfilerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,8 +443,8 @@ TEST(CuptiActivityProfiler, AsyncTraceUsingIter) {
EXPECT_TRUE(profiler.isActive());

auto nextnext = next + milliseconds(1000);

profiler.performRunLoopStep(nextnext, nextnext);
profiler.ensureCollectTraceDone();
profiler.performRunLoopStep(nextnext, nextnext);

// Assert that tracing has completed
Expand Down

0 comments on commit 00dcd6e

Please sign in to comment.