diff --git a/libkineto/src/CuptiCallbackApi.cpp b/libkineto/src/CuptiCallbackApi.cpp index 28fc27d44..4aba00dae 100644 --- a/libkineto/src/CuptiCallbackApi.cpp +++ b/libkineto/src/CuptiCallbackApi.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #ifdef HAS_CUPTI #include "cupti_call.h" @@ -32,13 +31,6 @@ constexpr size_t MAX_CB_FNS_PER_CB = 8; // is enabled, not a specific cbid. constexpr uint32_t MAX_CUPTI_CALLBACK_ID_ALL = 0xffffffff; -// Reader Writer lock types -using ReaderWriterLock = std::shared_timed_mutex; -using ReaderLockGuard = std::shared_lock; -using WriteLockGuard = std::unique_lock; - -static ReaderWriterLock callbackLock_; - /* Callback Table : * Overall goal of the design is to optimize the lookup of function * pointers. The table is structured at two levels and the leaf diff --git a/libkineto/src/CuptiCallbackApi.h b/libkineto/src/CuptiCallbackApi.h index 59858aa7a..f267b34f6 100644 --- a/libkineto/src/CuptiCallbackApi.h +++ b/libkineto/src/CuptiCallbackApi.h @@ -16,6 +16,7 @@ #include #include #include +#include #include // TODO(T90238193) @@ -143,6 +144,12 @@ class CuptiCallbackApi { // As an implementation detail, cbid == 0xffffffff means enable the domain. std::set> enabledCallbacks_; + + // Reader Writer lock types + using ReaderWriterLock = std::shared_timed_mutex; + using ReaderLockGuard = std::shared_lock; + using WriteLockGuard = std::unique_lock; + ReaderWriterLock callbackLock_; #ifdef HAS_CUPTI CUptiResult lastCuptiStatus_; CUpti_SubscriberHandle subscriber_ {0}; diff --git a/libkineto/src/CuptiRangeProfiler.cpp b/libkineto/src/CuptiRangeProfiler.cpp index c26b660fd..ee6a14c16 100644 --- a/libkineto/src/CuptiRangeProfiler.cpp +++ b/libkineto/src/CuptiRangeProfiler.cpp @@ -109,14 +109,14 @@ CuptiRangeProfilerSession::CuptiRangeProfilerSession( LOG(INFO) << "\t" << m; } - CuptiRangeProfilerOptions opts{ - .metricNames = cupti_metrics, - .deviceId = 0, - .maxRanges = max_ranges, - .numNestingLevels = 1, - .cuContext = nullptr, - .unitTest = false}; - + CuptiRangeProfilerOptions opts; + opts.metricNames = cupti_metrics; + opts.deviceId = 0; + opts.maxRanges = max_ranges; + opts.numNestingLevels = 1; + opts.cuContext = nullptr; + opts.unitTest = false; + for (auto device_id : CuptiRBProfilerSession::getActiveDevices()) { LOG(INFO) << "Init CUPTI range profiler on gpu = " << device_id << " max ranges = " << max_ranges; diff --git a/libkineto/src/CuptiRangeProfilerConfig.cpp b/libkineto/src/CuptiRangeProfilerConfig.cpp index 3ec272f8c..9f778e023 100644 --- a/libkineto/src/CuptiRangeProfilerConfig.cpp +++ b/libkineto/src/CuptiRangeProfilerConfig.cpp @@ -10,7 +10,6 @@ #include #include -#include #include #include