From 62628a89ccc2e4df17d577ef1faef301b0092613 Mon Sep 17 00:00:00 2001 From: Richard Startin Date: Mon, 11 Mar 2024 14:28:03 +0000 Subject: [PATCH] WIP: frame level sampling --- ddprof-lib/src/main/cpp/FrameSampler.cpp | 82 +++++++++++++++++++ ddprof-lib/src/main/cpp/FrameSampler.h | 20 +++++ ddprof-lib/src/main/cpp/arguments.cpp | 14 +++- ddprof-lib/src/main/cpp/arguments.h | 4 +- ddprof-lib/src/main/cpp/ctimer.h | 3 +- ddprof-lib/src/main/cpp/ctimer_linux.cpp | 9 +- ddprof-lib/src/main/cpp/event.h | 19 +++++ ddprof-lib/src/main/cpp/flightRecorder.cpp | 36 ++++++++ ddprof-lib/src/main/cpp/flightRecorder.h | 4 + ddprof-lib/src/main/cpp/itimer.cpp | 9 ++ ddprof-lib/src/main/cpp/itimer.h | 1 + ddprof-lib/src/main/cpp/jfrMetadata.cpp | 17 ++++ ddprof-lib/src/main/cpp/jfrMetadata.h | 2 + ddprof-lib/src/main/cpp/perfEvents.h | 1 + ddprof-lib/src/main/cpp/perfEvents_linux.cpp | 6 ++ ddprof-lib/src/main/cpp/profiler.cpp | 27 ++++++ ddprof-lib/src/main/cpp/profiler.h | 2 + ddprof-lib/src/main/cpp/vmEntry.h | 1 + ddprof-lib/src/main/cpp/vmStructs.cpp | 4 + ddprof-lib/src/main/cpp/vmStructs.h | 15 ++++ .../profiler/cpu/FrameLevelCpuTest.java | 81 ++++++++++++++++++ 21 files changed, 353 insertions(+), 4 deletions(-) create mode 100644 ddprof-lib/src/main/cpp/FrameSampler.cpp create mode 100644 ddprof-lib/src/main/cpp/FrameSampler.h create mode 100644 ddprof-test/src/test/java/com/datadoghq/profiler/cpu/FrameLevelCpuTest.java diff --git a/ddprof-lib/src/main/cpp/FrameSampler.cpp b/ddprof-lib/src/main/cpp/FrameSampler.cpp new file mode 100644 index 00000000..5940a7c5 --- /dev/null +++ b/ddprof-lib/src/main/cpp/FrameSampler.cpp @@ -0,0 +1,82 @@ +#include "event.h" +#include "FrameSampler.h" +#include "profiler.h" +#include "vmEntry.h" +#include "stackFrame.h" + + +#include + +FrameSampler* const FrameSampler::_instance = new FrameSampler(); + +void FrameSampler::forget_sampled_methods() { + // TODO clear out methods seen in the last recording interval +} + +void FrameSampler::record_sampled_method(int tid, NMethod* nmethod) { + // TODO record once per method per recording interval, + // requires a signal safe concurrent set + u64 methodId = (u64) nmethod->method()->id(); + int codeSize = nmethod->codeSize(); + // we can't work with the code blob unless we know where it ends + // it is not null terminated so releasing a pointer to it as a + // c string is dangerous - calls to strlen may segfault etc. + if (codeSize >= 0) { + CodeEvent code; + code._id = methodId; + code._name = Profiler::instance()->stringLabelMap()->lookup(nmethod->name()); + code._code_size = codeSize; + code._code = nmethod->code(); + Profiler::instance()->recordCode(tid, &code); + } else { + std::cout << "no code size" << std::endl; + } +} + +void FrameSampler::do_sample(int tid, u64 counter, void *ucontext) { + JNIEnv* jni = VM::jni(); + if (jni == NULL) { + return; // not Java + } + StackFrame frame(ucontext); + uintptr_t pc = frame.pc(); + bool inCodeHeap = CodeHeap::contains((const void*) pc); + if (!inCodeHeap) { + std::cout << "using last Java PC" << std::endl; + VMThread* vmThread = VMThread::current(); + if (vmThread == NULL) { + return; + } + pc = vmThread->lastJavaPC(); + } + if (pc == 0) { + std::cout << "null PC" << std::endl; + return; + } + NMethod* nm = CodeHeap::findNMethod((const void*) pc); + if (nm != NULL) { + if (!nm->isNMethod()) { + std::cout << "not nmethod" << std::endl; + } else if (!nm->isFrameCompleteAt((const void*) pc)) { + std::cout << "not complete" << std::endl; + } + if (nm->isNMethod() && nm->isFrameCompleteAt((const void*) pc)) { + int compilationTier = nm->level(); + jmethodID methodID = nm->method()->id(); + assert((u64)pc > (u64)nm->code()); + record_sampled_method(tid, nm); + FrameEvent event(methodID, (u64)nm->code() - (u64)pc, compilationTier); + Profiler::instance()->recordFrameSample(counter, tid, &event); + } + } else { + std::cout << "null nmethod" << std::endl; + } +} + +void FrameSampler::sample(int tid, u64 counter, void *ucontext) { + _instance->do_sample(tid, counter, ucontext); +} + +void FrameSampler::clear() { + _instance->forget_sampled_methods(); +} diff --git a/ddprof-lib/src/main/cpp/FrameSampler.h b/ddprof-lib/src/main/cpp/FrameSampler.h new file mode 100644 index 00000000..59a3814a --- /dev/null +++ b/ddprof-lib/src/main/cpp/FrameSampler.h @@ -0,0 +1,20 @@ +#ifndef JAVA_PROFILER_FRAMESAMPLER_H +#define JAVA_PROFILER_FRAMESAMPLER_H + +#include "arch.h" +#include "vmStructs.h" + +class FrameSampler { +private: + static FrameSampler* const _instance; + void do_sample(int tid, u64 counter, void* ucontext); + void forget_sampled_methods(); + void record_sampled_method(int tid, NMethod* nmethod); +public: + static void sample(int tid, u64 counter, void* ucontext); + static void clear(); + +}; + + +#endif //JAVA_PROFILER_FRAMESAMPLER_H diff --git a/ddprof-lib/src/main/cpp/arguments.cpp b/ddprof-lib/src/main/cpp/arguments.cpp index f29c34fc..3b545ad9 100644 --- a/ddprof-lib/src/main/cpp/arguments.cpp +++ b/ddprof-lib/src/main/cpp/arguments.cpp @@ -23,7 +23,6 @@ #include #include "arguments.h" - // Predefined value that denotes successful operation const Error Error::OK(NULL); @@ -187,6 +186,18 @@ Error Arguments::parse(const char* args) { _event = value; } + CASE("frames") + if (value != NULL && value[0] != 0) { + switch (value[0]) { + case 'y': + case 't': + _frame_samples = true; + break; + default: + _frame_samples = false; + } + } + CASE("memory") char* config = value ? strchr(value, ':') : NULL; if (config) { @@ -213,6 +224,7 @@ Error Arguments::parse(const char* args) { msg = "memory sampling interval must be >= 0"; } + CASE("interval") if (value == NULL || (_interval = parseUnits(value, UNIVERSAL)) <= 0) { msg = "Invalid interval"; diff --git a/ddprof-lib/src/main/cpp/arguments.h b/ddprof-lib/src/main/cpp/arguments.h index 92107718..a58765dc 100644 --- a/ddprof-lib/src/main/cpp/arguments.h +++ b/ddprof-lib/src/main/cpp/arguments.h @@ -129,6 +129,7 @@ class Arguments { Action _action; Ring _ring; const char* _event; + bool _frame_samples; long _interval; long _cpu; long _wall; @@ -176,7 +177,8 @@ class Arguments { _cstack(CSTACK_DEFAULT), _jfr_options(0), _context_attributes({}), - _lightweight(false) { + _lightweight(false), + _frame_samples(false) { } ~Arguments(); diff --git a/ddprof-lib/src/main/cpp/ctimer.h b/ddprof-lib/src/main/cpp/ctimer.h index e51ed68b..edd129f4 100644 --- a/ddprof-lib/src/main/cpp/ctimer.h +++ b/ddprof-lib/src/main/cpp/ctimer.h @@ -30,6 +30,7 @@ class CTimer : public Engine { static long _interval; static CStack _cstack; static int _signal; + static bool _frame_samples; static int _max_timers; static int* _timers; @@ -81,4 +82,4 @@ class CTimer : public Engine { #endif // __linux__ -#endif // _CTIMER_H \ No newline at end of file +#endif // _CTIMER_H diff --git a/ddprof-lib/src/main/cpp/ctimer_linux.cpp b/ddprof-lib/src/main/cpp/ctimer_linux.cpp index 5de2b9a7..fa23938c 100644 --- a/ddprof-lib/src/main/cpp/ctimer_linux.cpp +++ b/ddprof-lib/src/main/cpp/ctimer_linux.cpp @@ -24,6 +24,7 @@ #include "debugSupport.h" #include "profiler.h" #include "vmStructs.h" +#include "FrameSampler.h" #ifndef SIGEV_THREAD_ID @@ -81,6 +82,7 @@ int* CTimer::_timers = NULL; CStack CTimer::_cstack; volatile bool CTimer::_enabled = false; int CTimer::_signal; +bool CTimer::_frame_samples; int CTimer::registerThread(int tid) { if (tid >= _max_timers) { @@ -153,6 +155,7 @@ Error CTimer::start(Arguments& args) { _interval = args.cpuSamplerInterval(); _cstack = args._cstack; _signal = SIGPROF; + _frame_samples = args._frame_samples; int max_timers = OS::getMaxThreadId(); if (max_timers != _max_timers) { @@ -200,6 +203,10 @@ void CTimer::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) { } Shims::instance().setSighandlerTid(tid); + if (_frame_samples) { + FrameSampler::sample(tid, _interval, ucontext); + } + ExecutionEvent event; VMThread* vm_thread = VMThread::current(); if (vm_thread) { @@ -211,4 +218,4 @@ void CTimer::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) { Shims::instance().setSighandlerTid(-1); } -#endif // __linux__ \ No newline at end of file +#endif // __linux__ diff --git a/ddprof-lib/src/main/cpp/event.h b/ddprof-lib/src/main/cpp/event.h index 1bf33ba7..10874689 100644 --- a/ddprof-lib/src/main/cpp/event.h +++ b/ddprof-lib/src/main/cpp/event.h @@ -44,6 +44,18 @@ class ExecutionEvent : public Event { ExecutionEvent() : Event(), _thread_state(ThreadState::RUNNABLE), _weight(1), _execution_mode(ExecutionMode::UNKNOWN) {} }; +class FrameEvent : public Event { +public: + jmethodID _methodID; // link to method + u64 _pcRelative; // to get the pc, link to the nmethod and add its offset + u8 _compilationTier; + + FrameEvent(jmethodID methodID, u64 pcRelative, u8 compilationTier) : Event(), + _methodID(methodID), + _pcRelative(pcRelative), + _compilationTier(compilationTier) {} +}; + class AllocEvent : public Event { public: u64 _size; @@ -158,4 +170,11 @@ typedef struct QueueTimeEvent { u32 _origin; } QueueTimeEvent; +typedef struct CodeEvent { + u64 _id; + u32 _name; + u32 _code_size; + const char* _code; +} CodeEvent; + #endif // _EVENT_H diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index 4d5946f2..329a2275 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -1214,6 +1214,32 @@ void Recording::recordMethodSample(Buffer* buf, int tid, u32 call_trace_id, Exec flushIfNeeded(buf); } +void Recording::recordFrameSample(Buffer* buf, int tid, FrameEvent* event) { + int start = buf->skip(1); + buf->putVar64(T_DATADOG_FRAME_SAMPLE); + buf->putVar64(TSC::ticks()); + buf->putVar64(tid); + buf->putVar64((u64) event->_methodID); + buf->putVar64(event->_pcRelative); + buf->put8(event->_compilationTier); + writeContext(buf, Contexts::get(tid)); + writeEventSizePrefix(buf, start); + flushIfNeeded(buf); +} + +void Recording::recordCodeSample(Buffer *buf, CodeEvent* event) { + int length = 1 + MAX_VAR64_LENGTH + MAX_VAR32_LENGTH + event->_code_size; + // ensure the code does not get truncated, because we won't be able to disassemble it if it is + flushIfNeeded(buf, RECORDING_BUFFER_LIMIT - length); + int start = buf->skip(1); + buf->putVar64(T_DATADOG_CODE_SAMPLE); + buf->putVar64((u64) event->_id); + buf->putVar64(event->_name); + buf->putUtf8(event->_code, event->_code_size); + writeEventSizePrefix(buf, start); + flushIfNeeded(buf); +} + void Recording::recordWallClockEpoch(Buffer* buf, WallClockEpochEvent* event) { int start = buf->skip(1); buf->putVar64(T_WALLCLOCK_SAMPLE_EPOCH); @@ -1451,6 +1477,13 @@ void FlightRecorder::recordHeapUsage(int lock_index, long value, bool live) { } } +void FlightRecorder::recordCode(int lock_index, CodeEvent *code) { + if (_rec != NULL) { + Buffer *buf = _rec->buffer(lock_index); + _rec->recordCodeSample(buf, code); + } +} + void FlightRecorder::recordEvent(int lock_index, int tid, u32 call_trace_id, int event_type, Event* event, u64 counter) { if (_rec != NULL) { @@ -1474,6 +1507,9 @@ void FlightRecorder::recordEvent(int lock_index, int tid, u32 call_trace_id, case BCI_PARK: _rec->recordThreadPark(buf, tid, call_trace_id, (LockEvent*)event); break; + case BCI_FRAME: + _rec->recordFrameSample(buf, tid, (FrameEvent*)event); + break; } _rec->flushIfNeeded(buf); _rec->addThread(tid); diff --git a/ddprof-lib/src/main/cpp/flightRecorder.h b/ddprof-lib/src/main/cpp/flightRecorder.h index 515680da..9c8d3f51 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.h +++ b/ddprof-lib/src/main/cpp/flightRecorder.h @@ -234,6 +234,8 @@ class Recording { void recordExecutionSample(Buffer* buf, int tid, u32 call_trace_id, ExecutionEvent* event); void recordMethodSample(Buffer* buf, int tid, u32 call_trace_id, ExecutionEvent* event); + void recordFrameSample(Buffer* buf, int tid, FrameEvent* event); + void recordCodeSample(Buffer* buf, CodeEvent* event); void recordWallClockEpoch(Buffer* buf, WallClockEpochEvent* event); void recordTraceRoot(Buffer* buf, int tid, TraceRootEvent* event); void recordQueueTime(Buffer* buf, int tid, QueueTimeEvent* event); @@ -301,6 +303,8 @@ class FlightRecorder { void recordDatadogSetting(int lock_index, int length, const char* name, const char* value, const char* unit); void recordHeapUsage(int lock_index, long value, bool live); + + void recordCode(int lock_index, CodeEvent* code); }; #endif // _FLIGHTRECORDER_H diff --git a/ddprof-lib/src/main/cpp/itimer.cpp b/ddprof-lib/src/main/cpp/itimer.cpp index 79ce9a2b..ecfdfc90 100644 --- a/ddprof-lib/src/main/cpp/itimer.cpp +++ b/ddprof-lib/src/main/cpp/itimer.cpp @@ -16,6 +16,7 @@ #include #include "debugSupport.h" +#include "FrameSampler.h" #include "itimer.h" #include "os.h" #include "profiler.h" @@ -23,9 +24,12 @@ #include "thread.h" #include "vmStructs.h" +#include + volatile bool ITimer::_enabled = false; long ITimer::_interval; CStack ITimer::_cstack; +bool ITimer::_frame_samples; void ITimer::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) { if (!_enabled) return; @@ -39,6 +43,10 @@ void ITimer::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) { } Shims::instance().setSighandlerTid(tid); + if (_frame_samples) { + FrameSampler::sample(tid, _interval, ucontext); + } + ExecutionEvent event; VMThread* vm_thread = VMThread::current(); if (vm_thread) { @@ -67,6 +75,7 @@ Error ITimer::check(Arguments& args) { Error ITimer::start(Arguments& args) { _interval = args.cpuSamplerInterval(); _cstack = args._cstack; + _frame_samples = args._frame_samples; OS::installSignalHandler(SIGPROF, signalHandler); diff --git a/ddprof-lib/src/main/cpp/itimer.h b/ddprof-lib/src/main/cpp/itimer.h index 0f3f3946..45a4d1e3 100644 --- a/ddprof-lib/src/main/cpp/itimer.h +++ b/ddprof-lib/src/main/cpp/itimer.h @@ -26,6 +26,7 @@ class ITimer : public Engine { static volatile bool _enabled; static long _interval; static CStack _cstack; + static bool _frame_samples; static void signalHandler(int signo, siginfo_t* siginfo, void* ucontext); diff --git a/ddprof-lib/src/main/cpp/jfrMetadata.cpp b/ddprof-lib/src/main/cpp/jfrMetadata.cpp index dcabb156..1206c8e8 100644 --- a/ddprof-lib/src/main/cpp/jfrMetadata.cpp +++ b/ddprof-lib/src/main/cpp/jfrMetadata.cpp @@ -128,6 +128,23 @@ void JfrMetadata::initialize(const std::vector& contextAttributes) << field("localRootSpanId", T_LONG, "Local Root Span ID") || contextAttributes) + << (type("datadog.FrameSample", T_DATADOG_FRAME_SAMPLE, "Intra-frame CPU sample") + << category("Datadog", "Profiling") + << field("startTime", T_LONG, "Start Time", F_TIME_TICKS) + << field("eventThread", T_THREAD, "Thread", F_CPOOL) + << field("methodId", T_LONG, "Method Id") + << field("compilationTier", T_INT, "Compilation Tier") + << field("pcRelative", T_LONG, "Program counter relative to method start") + << field("spanId", T_LONG, "Span ID") + << field("localRootSpanId", T_LONG, "Local Root Span ID") + || contextAttributes) + + << (type("datadog.CodeSample", T_DATADOG_CODE_SAMPLE, "Method Code Sample") + << category("Datadog", "Profiling") + << field("methodId", T_LONG, "Method Id") + << field("name", T_STRING, "Method Name", F_CPOOL) + << field("code", T_STRING, "Compiled Code")) + << (type("datadog.WallClockSamplingEpoch", T_WALLCLOCK_SAMPLE_EPOCH, "WallClock Sampling Epoch") << category("Datadog", "Profiling") << field("startTime", T_LONG, "Start Time", F_TIME_TICKS) diff --git a/ddprof-lib/src/main/cpp/jfrMetadata.h b/ddprof-lib/src/main/cpp/jfrMetadata.h index 25d4ce1e..04bdeb5f 100644 --- a/ddprof-lib/src/main/cpp/jfrMetadata.h +++ b/ddprof-lib/src/main/cpp/jfrMetadata.h @@ -78,6 +78,8 @@ enum JfrType { T_QUEUE_TIME = 123, T_DATADOG_CLASSREF_CACHE = 124, T_DATADOG_COUNTER = 125, + T_DATADOG_FRAME_SAMPLE = 126, + T_DATADOG_CODE_SAMPLE = 127, T_ANNOTATION = 200, T_LABEL = 201, T_CATEGORY = 202, diff --git a/ddprof-lib/src/main/cpp/perfEvents.h b/ddprof-lib/src/main/cpp/perfEvents.h index 0c782685..0f9c13d4 100644 --- a/ddprof-lib/src/main/cpp/perfEvents.h +++ b/ddprof-lib/src/main/cpp/perfEvents.h @@ -37,6 +37,7 @@ class PerfEvents : public Engine { static Ring _ring; static CStack _cstack; static bool _use_mmap_page; + static bool _frame_samples; // cppcheck-suppress unusedPrivateFunction static u64 readCounter(siginfo_t* siginfo, void* ucontext); diff --git a/ddprof-lib/src/main/cpp/perfEvents_linux.cpp b/ddprof-lib/src/main/cpp/perfEvents_linux.cpp index 1d466ed8..d00aad0a 100644 --- a/ddprof-lib/src/main/cpp/perfEvents_linux.cpp +++ b/ddprof-lib/src/main/cpp/perfEvents_linux.cpp @@ -548,6 +548,7 @@ long PerfEvents::_interval; Ring PerfEvents::_ring; CStack PerfEvents::_cstack; bool PerfEvents::_use_mmap_page; +bool PerfEvents::_frame_samples; static int __intsort(const void *a, const void *b) { return *(const int*)a > *(const int*)b; @@ -711,6 +712,10 @@ void PerfEvents::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) { if (_enabled) { Shims::instance().setSighandlerTid(tid); + if (_frame_samples) { + FrameSampler::sample(tid, _interval, ucontext); + } + u64 counter = readCounter(siginfo, ucontext); ExecutionEvent event; VMThread* vm_thread = VMThread::current(); @@ -819,6 +824,7 @@ Error PerfEvents::start(Arguments& args) { } _interval = interval ? interval : _event_type->default_interval; + _frame_samples = args._frame_samples; _ring = args._ring; if ((_ring & RING_KERNEL) && !Symbols::haveKernelSymbols()) { diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 6493c12c..40551d69 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -44,6 +44,7 @@ #include "vmStructs.h" #include "context.h" #include "counters.h" +#include "FrameSampler.h" // The instance is not deleted on purpose, since profiler structures @@ -646,6 +647,27 @@ void Profiler::recordExternalSample(u64 counter, int tid, jvmtiFrameInfo *jvmti_ _locks[lock_index].unlock(); } + +void Profiler::recordFrameSample(u64 counter, int tid, FrameEvent* event) { + u32 lock_index = getLockIndex(tid); + if (!_locks[lock_index].tryLock() && + !_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() && + !_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) { + _jfr.recordEvent(lock_index, tid, 0, BCI_FRAME, event, counter); + } + _locks[lock_index].unlock(); +} + +void Profiler::recordCode(int tid, CodeEvent* event) { + u32 lock_index = getLockIndex(tid); + if (!_locks[lock_index].tryLock() && + !_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() && + !_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) { + _jfr.recordCode(lock_index, event); + } + _locks[lock_index].unlock(); +} + void Profiler::recordSample(void* ucontext, u64 counter, int tid, jint event_type, Event* event) { atomicInc(_total_samples); @@ -1179,6 +1201,7 @@ Error Profiler::stop() { // Acquire all spinlocks to avoid race with remaining signals lockAll(); _jfr.stop(); + FrameSampler::clear(); unlockAll(); _state = IDLE; @@ -1229,6 +1252,7 @@ Error Profiler::flushJfr() { lockAll(); _jfr.flush(); + FrameSampler::clear(); unlockAll(); return Error::OK; @@ -1259,6 +1283,9 @@ Error Profiler::dump(const char* path, const int length) { if (!_omit_stacktraces) { _call_trace_storage.clear(); } + + FrameSampler::clear(); + unlockAll(); // Reset classmap _class_map_lock.lock(); diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index acaea04d..9b9356ab 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -267,6 +267,8 @@ class Profiler { void switchThreadEvents(jvmtiEventMode mode); int convertNativeTrace(int native_frames, const void** callchain, ASGCT_CallFrame* frames); void recordSample(void* ucontext, u64 counter, int tid, jint event_type, Event* event); + void recordFrameSample(u64 counter, int tid, FrameEvent* event); + void recordCode(int tid, CodeEvent* event); void recordExternalSample(u64 counter, int tid, jvmtiFrameInfo *jvmti_frames, jint num_jvmti_frames, bool truncated, jint event_type, Event* event); void recordExternalSample(u64 counter, int tid, int num_frames, ASGCT_CallFrame* frames, bool truncated, jint event_type, Event* event); void recordWallClockEpoch(int tid, WallClockEpochEvent* event); diff --git a/ddprof-lib/src/main/cpp/vmEntry.h b/ddprof-lib/src/main/cpp/vmEntry.h index e9a0a2de..128f77b7 100644 --- a/ddprof-lib/src/main/cpp/vmEntry.h +++ b/ddprof-lib/src/main/cpp/vmEntry.h @@ -32,6 +32,7 @@ // Denotes ASGCT_CallFrame where method_id has special meaning (not jmethodID) enum ASGCT_CallFrameType { + BCI_FRAME = 1, // frame sample BCI_CPU = 0, // cpu time BCI_WALL = -10, // wall time BCI_NATIVE_FRAME = -11, // native function name (char*) diff --git a/ddprof-lib/src/main/cpp/vmStructs.cpp b/ddprof-lib/src/main/cpp/vmStructs.cpp index d2f2889c..2a808b77 100644 --- a/ddprof-lib/src/main/cpp/vmStructs.cpp +++ b/ddprof-lib/src/main/cpp/vmStructs.cpp @@ -60,6 +60,7 @@ int VMStructs::_anchor_fp_offset = -1; int VMStructs::_frame_size_offset = -1; int VMStructs::_frame_complete_offset = -1; int VMStructs::_code_begin_offset = -1; +int VMStructs::_code_end_offset = -1; int VMStructs::_scopes_begin_offset = -1; int VMStructs::_nmethod_name_offset = -1; int VMStructs::_nmethod_method_offset = -1; @@ -273,7 +274,10 @@ void VMStructs::initOffsets() { _code_begin_offset = - *(int*)(entry + offset_offset); } else if (strcmp(field, "_name") == 0) { _nmethod_name_offset = *(int*)(entry + offset_offset); + } else if (strcmp(field, "_code_end") == 0) { + _code_end_offset = - *(int*)(entry + offset_offset); } + // TODO find older aliases for _code_end (what is the counterpart for _code_offset?) } else if (strcmp(type, "CodeCache") == 0) { if (strcmp(field, "_heap") == 0) { _code_heap_addr = *(char***)(entry + address_offset); diff --git a/ddprof-lib/src/main/cpp/vmStructs.h b/ddprof-lib/src/main/cpp/vmStructs.h index 9ac77218..256ac38e 100644 --- a/ddprof-lib/src/main/cpp/vmStructs.h +++ b/ddprof-lib/src/main/cpp/vmStructs.h @@ -67,6 +67,7 @@ class VMStructs { static int _frame_size_offset; static int _frame_complete_offset; static int _code_begin_offset; + static int _code_end_offset; static int _scopes_begin_offset; static int _nmethod_name_offset; static int _nmethod_method_offset; @@ -360,6 +361,20 @@ class NMethod : VMStructs { } } + const int codeSize() { + if (_code_begin_offset >= 0 && _code_end_offset >= 0) { + const char* begin = *(const char**) at(_code_begin_offset); + const char* end = *(const char**) at(_code_end_offset); + return (int)(end - begin); + } else if (_code_end_offset != -1) { + // TODO suspect + const char* begin = at(*(int*) at(-_code_begin_offset)); + const char* end = at(*(int*) at(-_code_end_offset)); + return (int)(end - begin); + } + return -1; + } + const char* scopes() { if (_scopes_begin_offset >= 0) { return *(const char**) at(_scopes_begin_offset); diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/FrameLevelCpuTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/FrameLevelCpuTest.java new file mode 100644 index 00000000..a6cfed67 --- /dev/null +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/cpu/FrameLevelCpuTest.java @@ -0,0 +1,81 @@ +package com.datadoghq.profiler.cpu; + +import com.datadoghq.profiler.AbstractProfilerTest; +import com.datadoghq.profiler.Platform; +import org.junit.jupiter.api.Assumptions; +import org.junitpioneer.jupiter.RetryingTest; +import org.openjdk.jmc.common.item.*; +import org.openjdk.jmc.common.unit.ContentType; +import org.openjdk.jmc.common.unit.IQuantity; +import org.openjdk.jmc.flightrecorder.jdk.JdkAttributes; + +import java.util.*; +import java.util.concurrent.ExecutionException; + +import static com.datadoghq.profiler.MoreAssertions.assertInRange; +import static org.junit.jupiter.api.Assertions.*; +import static org.openjdk.jmc.common.item.Attribute.attr; +import static org.openjdk.jmc.common.unit.UnitLookup.NUMBER; + +public class FrameLevelCpuTest extends AbstractProfilerTest { + + private static final IAttribute METHOD_ID = attr("methodId", "", "", NUMBER); + private static final IAttribute COMPILATION_TIER = attr("compilationTier", "", "", NUMBER); + private static final IAttribute PC_RELATIVE = attr("pcRelative", "", "", NUMBER); + + private ProfiledCode profiledCode; + + @Override + protected void before() { + profiledCode = new ProfiledCode(profiler); + } + + @RetryingTest(10) + public void test() throws ExecutionException, InterruptedException { + Assumptions.assumeTrue(!Platform.isJ9()); + for (int i = 0, id = 1; i < 100; i++, id += 3) { + profiledCode.method1(id); + } + stopProfiler(); + IItemCollection frameSamples = verifyEvents("datadog.FrameSample"); + IItemCollection codeSamples = verifyEvents("datadog.CodeSample"); + + Map> codeSamplesByMethodId = new HashMap<>(); + for (IItemIterable it : codeSamples) { + IMemberAccessor methodIdAccessor = METHOD_ID.getAccessor(it.getType()); + for (IItem codeSample : it) { + long methodId = methodIdAccessor.getMember(codeSample).longValue(); + codeSamplesByMethodId.computeIfAbsent(methodId, mi -> new ArrayList<>()) + .add(codeSample); + } + } + for (IItemIterable it : frameSamples) { + IMemberAccessor methodIdAccessor = METHOD_ID.getAccessor(it.getType()); + IMemberAccessor tierAccessor = COMPILATION_TIER.getAccessor(it.getType()); + IMemberAccessor pcRelativeAccessor = PC_RELATIVE.getAccessor(it.getType()); + for (IItem frameSample : it) { + long methodId = methodIdAccessor.getMember(frameSample).longValue(); + // check we don't have frames without corresponding code samples + assertTrue(codeSamplesByMethodId.containsKey(methodId)); + int tier = (int) tierAccessor.getMember(frameSample).longValue(); + assertTrue(tier > 0); + long pcRelative = pcRelativeAccessor.getMember(frameSample).longValue(); + assertTrue(pcRelative >= 0); + // TODO assert it belongs to the code blob too + } + } + + } + + + + @Override + protected void after() throws Exception { + profiledCode.close(); + } + + @Override + protected String getProfilerCommand() { + return "cpu=10ms,frames=y"; + } +}