Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Properly manage JNI resources in JVMTI wallclock sampler #168

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
227 changes: 197 additions & 30 deletions ddprof-lib/src/main/cpp/wallClock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,141 @@
#include "vmStructs.h"
#include <math.h>
#include <random>
#include <cstdlib>

class JVMTIThreads {
private:
jvmtiEnv* _jvmti; // Pointer to JVMTI environment
JNIEnv* _jni; // Pointer to JNI environment
jthread* _threads_ptr; // Array of thread references
jint _threads_count; // Number of threads

public:
// Constructor that takes a jvmtiEnv and retrieves all threads
JVMTIThreads(jvmtiEnv* jvmti, JNIEnv* jni) : _jvmti(jvmti), _jni(jni), _threads_ptr(nullptr), _threads_count(0) {
if (_jvmti && _jvmti->GetAllThreads(&_threads_count, &_threads_ptr) != JVMTI_ERROR_NONE) {
_threads_count = 0;
_threads_ptr = nullptr;
}
}

// Destructor to clean up resources
~JVMTIThreads() {
if (_threads_ptr) {
// Delete local references of threads
for (jint i = 0; i < _threads_count; ++i) {
if (_threads_ptr[i]) {
_jni->DeleteLocalRef(_threads_ptr[i]);
}
}

// Deallocate memory for threads_ptr
_jvmti->Deallocate(reinterpret_cast<unsigned char*>(_threads_ptr));
}
}

// Disable copy constructor and assignment operator
JVMTIThreads(const JVMTIThreads&) = delete;
JVMTIThreads& operator=(const JVMTIThreads&) = delete;

// Allow move constructor and assignment operator
JVMTIThreads(JVMTIThreads&& other) noexcept
: _jvmti(other._jvmti), _jni(other._jni), _threads_ptr(other._threads_ptr), _threads_count(other._threads_count) {
other._jvmti = nullptr;
other._threads_ptr = nullptr;
other._threads_count = 0;
}

JVMTIThreads& operator=(JVMTIThreads&& other) noexcept {
if (this != &other) {
// Clean up current resources
this->~JVMTIThreads();

// Transfer ownership
_jvmti = other._jvmti;
_jni = other._jni;
_threads_ptr = other._threads_ptr;
_threads_count = other._threads_count;

other._jvmti = nullptr;
other._jni = nullptr;
other._threads_ptr = nullptr;
other._threads_count = 0;
}
return *this;
}

// Getters for thread count and thread pointer
jint count() const {
return _threads_count;
}

// Operator[] to access individual threads
jthread operator[](jint index) const noexcept {
if (index < 0 || index >= _threads_count) {
return nullptr;
}
return _threads_ptr[index];
}
};

template<typename T>
class MoveToLocal {
private:
JNIEnv* _jni; // JNI environment
T _ref; // Local JNI reference
public:
MoveToLocal(JNIEnv* jni, T global_ref, bool is_weak) : _jni(jni) {
if (jni != nullptr) {
if (global_ref) {
_ref = jni->NewLocalRef(global_ref);
if (is_weak) {
_jni->DeleteWeakGlobalRef(global_ref);
} else {
_jni->DeleteGlobalRef(global_ref);
}
}
}
}

~MoveToLocal() {
if (_jni != nullptr) {
if (_ref) {
_jni->DeleteLocalRef(_ref);
}
}
}

// Disable copy constructor and assignment operator
MoveToLocal(const MoveToLocal&) = delete;
MoveToLocal& operator=(const MoveToLocal&) = delete;

// Allow move constructor and assignment operator
MoveToLocal(MoveToLocal&& other) noexcept
: _jni(other._jni), _ref(other._ref) {
other._jni = nullptr;
other._ref = nullptr;
}

MoveToLocal& operator=(MoveToLocal&& other) noexcept {
if (this != &other) {
// Clean up current resources
this->~MoveToLocal();

// Transfer ownership
_jni = other._jni;
_ref = other._ref;

other._jni = nullptr;
other._ref = nullptr;
}
return *this;
}

T local() {
return _ref;
}
};

std::atomic<bool> BaseWallClock::_enabled{false};

Expand Down Expand Up @@ -154,56 +289,89 @@ void WallClockASGCT::initialize(Arguments& args) {
}

void WallClockJVMTI::timerLoop() {
// Check for enablement before attaching/dettaching the current thread
// Check for enablement before attaching/dettaching the current thread
if (!isEnabled()) {
return;
}
// Attach to JVM as the first step
VM::attachThread("Datadog Profiler Wallclock Sampler");
auto collectThreads = [&](std::vector<ThreadEntry>& threads) {
jvmtiEnv* jvmti = VM::jvmti();
if (jvmti == nullptr) {
return;
}
JNIEnv* jni = VM::jni();

jint threads_count = 0;
jthread* threads_ptr = nullptr;
jvmti->GetAllThreads(&threads_count, &threads_ptr);

bool do_filter = Profiler::instance()->threadFilter()->enabled();
int self = OS::threadId();
VM::attachThread("Datadog Profiler Wallclock Sampler");
auto collectThreads = [&](std::vector<ThreadEntry>& threads) {
jvmtiEnv* jvmti = VM::jvmti();
if (jvmti == nullptr) {
return;
}
JNIEnv* jni = VM::jni();

for (int i = 0; i < threads_count; i++) {
jthread thread = threads_ptr[i];
if (thread != nullptr) {
VMThread* nThread = VMThread::fromJavaThread(jni, thread);
if (nThread == nullptr) {
continue;
}
int tid = nThread->osThreadId();
if (tid != self && (!do_filter || Profiler::instance()->threadFilter()->accept(tid))) {
threads.push_back({nThread, thread});
// When we collect thread list via JVMTI the threads will be effectively local references
// These local references would be automatically cleaned up once we get back from the native call to Java
// But here we are in an endless loop, never leaving the native call and as such all those local references
// would just keep on piling up
// Therefore, we have this neat RAII JMVTIThreads type which will take care of releasing the local references
// once we are out of scope here
JVMTIThreads thread_array(jvmti, jni);
bool do_filter = Profiler::instance()->threadFilter()->enabled();
int self = OS::threadId();
for (int i = 0; i < thread_array.count(); i++) {
jthread thread = thread_array[i];
if (thread != nullptr) {
VMThread* nThread = VMThread::fromJavaThread(jni, thread);
if (nThread == nullptr) {
continue;
}
jint thread_state;
if (jvmti->GetThreadState(thread, &thread_state) == JVMTI_ERROR_NONE &&
(thread_state & JVMTI_THREAD_STATE_TERMINATED) == 0) {
// It might not always be possible to resolve native thread from a java thread
// eg. when we are trying that very early in the thread startup
// In that case we would return -1 as the native tid and just skip that thread
int tid = VMThread::nativeThreadId(jni, thread);
if (tid != -1 && tid != self && (!do_filter || Profiler::instance()->threadFilter()->accept(tid))) {
// if NewWeakGlobalRef fails it will return 'nullptr' and we will skip it when the thread list is processed
threads.push_back({nThread, jni->NewWeakGlobalRef(thread)});
}
}
}
jvmti->Deallocate((unsigned char*)threads_ptr);
};
}
}
};

auto sampleThreads = [&](ThreadEntry& thread_entry, int& num_failures, int& threads_already_exited, int& permission_denied) {
static jint max_stack_depth = (jint)Profiler::instance()->max_stack_depth();
static jvmtiFrameInfo* frame_buffer = new jvmtiFrameInfo[max_stack_depth];
static jvmtiEnv* jvmti = VM::jvmti();
static JNIEnv* jni = VM::jni();

int num_frames = 0;
jvmtiError err = jvmti->GetStackTrace(thread_entry.java, 0, max_stack_depth, frame_buffer, &num_frames);
if (thread_entry.java_ref == nullptr) {
num_failures++;
return false;
}

MoveToLocal<jthread> mtl(jni, thread_entry.java_ref, true);
jthread thread = mtl.local();
if (thread == nullptr) {
num_failures++;
return false;
}

jint thread_state;
jvmtiError state_err = jvmti->GetThreadState(thread, &thread_state);
if (state_err != JVMTI_ERROR_NONE || (thread_state & JVMTI_THREAD_STATE_TERMINATED) != 0) {
num_failures++;
return false;
}
jvmtiError err = jvmti->GetStackTrace(thread, 0, max_stack_depth, frame_buffer, &num_frames);

if (err != JVMTI_ERROR_NONE) {
num_failures++;
if (err == JVMTI_ERROR_THREAD_NOT_ALIVE) {
threads_already_exited++;
}
return false;
}
if (num_frames == 0) {
// some JVMTI attached threads are Java-like but have no stack; we can just ignore them
return true;
}
ExecutionEvent event;
VMThread* vm_thread = thread_entry.native;
int raw_thread_state = vm_thread->state();
Expand Down Expand Up @@ -267,6 +435,5 @@ void WallClockASGCT::timerLoop() {
}
return true;
};

timerLoopCommon<int>(collectThreads, sampleThreads, _reservoir_size, _interval);
}
72 changes: 40 additions & 32 deletions ddprof-lib/src/main/cpp/wallClock.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ class BaseWallClock : public Engine {
// Profiler::recordSample().
int _reservoir_size;

pthread_t _thread;
virtual void timerLoop() = 0;
virtual void initialize(Arguments& args) {};
pthread_t _thread;
virtual void timerLoop() = 0;
virtual void initialize(Arguments& args) {};

static void *threadEntry(void *wall_clock) {
((BaseWallClock *)wall_clock)->timerLoop();
Expand Down Expand Up @@ -76,38 +76,46 @@ class BaseWallClock : public Engine {

while (_running.load(std::memory_order_relaxed)) {
collectThreads(threads);
int size = threads.size();
if (threads.size() > 0) {
int num_failures = 0;
int threads_already_exited = 0;
int permission_denied = 0;
std::vector<ThreadType> sample = reservoir.sample(threads);
for (ThreadType thread : sample) {
if (!sampleThreads(thread, num_failures, threads_already_exited, permission_denied)) {
continue;
}
}

int num_failures = 0;
int threads_already_exited = 0;
int permission_denied = 0;
std::vector<ThreadType> sample = reservoir.sample(threads);
for (ThreadType thread : sample) {
if (!sampleThreads(thread, num_failures, threads_already_exited, permission_denied)) {
continue;
epoch.updateNumSamplableThreads(threads.size());
epoch.updateNumFailedSamples(num_failures);
epoch.updateNumSuccessfulSamples(sample.size() - num_failures);
epoch.updateNumExitedThreads(threads_already_exited);
epoch.updateNumPermissionDenied(permission_denied);
u64 endTime = TSC::ticks();
u64 duration = TSC::ticks_to_millis(endTime - startTime);
if (epoch.hasChanged() || duration >= 1000) {
epoch.endEpoch(duration);
Profiler::instance()->recordWallClockEpoch(self, &epoch);
epoch.newEpoch(endTime);
startTime = endTime;
} else {
epoch.clean();
}
}

epoch.updateNumSamplableThreads(threads.size());
epoch.updateNumFailedSamples(num_failures);
epoch.updateNumSuccessfulSamples(sample.size() - num_failures);
epoch.updateNumExitedThreads(threads_already_exited);
epoch.updateNumPermissionDenied(permission_denied);
u64 endTime = TSC::ticks();
u64 duration = TSC::ticks_to_millis(endTime - startTime);
if (epoch.hasChanged() || duration >= 1000) {
epoch.endEpoch(duration);
Profiler::instance()->recordWallClockEpoch(self, &epoch);
epoch.newEpoch(endTime);
startTime = endTime;
} else {
epoch.clean();
threads.clear();
}

threads.clear();
// Get a random sleep duration
// clamp the random interval to <1,2N-1>
// the probability of clamping is extremely small, close to zero
OS::sleep(std::min(std::max((long int)1, static_cast<long int>(distribution(generator))), ((_interval * 2) - 1)));
// Restrict the random interval to <N/2,N+N/2>
// With the given parameters N/2 is 5 standard deviations from the mean ->
// the probability of falling outside of the given range is ~0.000058%
long limit = _interval / 2;
long int delay = _interval;
do {
delay = static_cast<long int>(distribution(generator));
} while (std::abs(delay - _interval) > limit);
OS::sleep(delay);
}
}

Expand Down Expand Up @@ -159,8 +167,8 @@ class WallClockJVMTI : public BaseWallClock {
void timerLoop() override;
public:
struct ThreadEntry {
VMThread* native;
jthread java;
VMThread* native;
jobject java_ref;
};
WallClockJVMTI() : BaseWallClock() {}
const char* name() override {
Expand Down
Loading