From 438ff2384d1b6868d132a83293b07ca950f5044e Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 1 Sep 2023 12:26:15 -0500 Subject: [PATCH] Squashed sampling commits. --- Makefile.am | 2 +- configure.ac | 2 + cuda/Makefile.am | 8 +- cuda/tracer_cuda.sh.in | 8 + cuda/tracer_cuda_helpers.include.c | 4 + cuda/tracer_cudart_helpers.include.c | 4 + hip/Makefile.am | 4 +- omp/Makefile.am | 4 +- omp/tracer_omp.sh.in | 8 + omp/tracer_ompt_helpers.include.c.erb | 4 + opencl/Makefile.am | 4 +- opencl/tracer_opencl.sh.in | 8 + opencl/tracer_opencl_helpers.include.c | 4 + sampling/Makefile.am | 59 ++ sampling/gen_sampling_custom_probes.rb | 32 + sampling/sampling_events.yaml | 11 + sampling/thapi_sampling.c | 143 +++ sampling/thapi_sampling.h | 5 + utils/Makefile.am | 4 +- utils/babeltrace_energy.in | 138 +++ utils/babeltrace_thapi.in | 5 +- utils/xprof_utils.cpp | 98 +++ utils/xprof_utils.hpp | 9 + xprof/btx_interval_model.yaml | 38 + xprof/btx_timeline.cpp | 118 +++ xprof/interval.c.erb | 2 + xprof/interval.h.erb | 3 +- xprof/interval_model.yaml | 26 +- xprof/perfetto_prunned.proto | 1106 +++++++++++++++++++++++- xprof/xprof.sh.erb.in | 12 + ze/Makefile.am | 5 +- ze/gen_ze.rb | 1 + ze/gen_ze_custom_probes.rb | 2 +- ze/tracer_ze.sh.in | 9 + ze/tracer_ze_helpers.include.c | 258 ++++++ ze/ze_events.yaml | 24 + ze/zeinterval_callbacks.cpp.erb | 55 +- ze/zeinterval_callbacks.hpp | 3 + 38 files changed, 2190 insertions(+), 40 deletions(-) create mode 100644 sampling/Makefile.am create mode 100644 sampling/gen_sampling_custom_probes.rb create mode 100644 sampling/sampling_events.yaml create mode 100644 sampling/thapi_sampling.c create mode 100644 sampling/thapi_sampling.h create mode 100755 utils/babeltrace_energy.in diff --git a/Makefile.am b/Makefile.am index 577082fb..d7ad6a40 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,4 +1,4 @@ ACLOCAL_AMFLAGS = -I m4 -SUBDIRS = utils opencl ze xprof cuda omp hip +SUBDIRS = utils sampling opencl ze xprof cuda omp hip EXTRA_DIST = autogen.sh README.md diff --git a/configure.ac b/configure.ac index 2a6c46ff..8977fa0c 100644 --- a/configure.ac +++ b/configure.ac @@ -98,6 +98,7 @@ AC_CHECK_FUNCS([atexit clock_gettime ftruncate memmove memset strdup strstr strt AC_CONFIG_FILES([Makefile utils/Makefile + sampling/Makefile xprof/xprof.sh.erb opencl/Makefile ze/Makefile @@ -113,6 +114,7 @@ AC_CONFIG_FILES([ze/test_wrapper.sh], [chmod +x ze/test_wrapper.sh]) AC_CONFIG_FILES([cuda/tracer_cuda.sh], [chmod +x cuda/tracer_cuda.sh]) AC_CONFIG_FILES([cuda/test_wrapper.sh], [chmod +x cuda/test_wrapper.sh]) AC_CONFIG_FILES([xprof/test_wrapper.sh], [chmod +x xprof/test_wrapper.sh]) +AC_CONFIG_FILES([utils/babeltrace_energy], [chmod +x utils/babeltrace_energy]) AC_CONFIG_FILES([utils/babeltrace_thapi], [chmod +x utils/babeltrace_thapi]) AC_CONFIG_FILES([omp/tracer_omp.sh], [chmod +x omp/tracer_omp.sh]) AC_CONFIG_FILES([hip/tracer_hip.sh], [chmod +x hip/tracer_hip.sh]) diff --git a/cuda/Makefile.am b/cuda/Makefile.am index dbf5ee0a..3bf8a07a 100644 --- a/cuda/Makefile.am +++ b/cuda/Makefile.am @@ -269,9 +269,9 @@ nodist_libTracerCUDA_la_SOURCES = \ $(CUDA_PROBES_INCL) \ tracer_cuda.c -libTracerCUDA_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(srcdir)/include -I../utils -I./ +libTracerCUDA_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/sampling -I$(srcdir)/include -I../utils -I./ libTracerCUDA_la_CFLAGS = -Wall -Wextra $(WERROR) $(LIBFFI_CFLAGS) $(LTTNG_UST_CFLAGS) -libTracerCUDA_la_LDFLAGS = $(LTTNG_UST_LIBS) -ldl -lpthread $(LIBFFI_LIBS) +libTracerCUDA_la_LDFLAGS = $(LTTNG_UST_LIBS) -ldl -lpthread $(LIBFFI_LIBS) ../sampling/libThapiSampling.la libTracerCUDA_la_LDFLAGS += -version-info 1:0:0 libTracerCUDA_la_LIBADD = libcudatracepoints.la @@ -279,9 +279,9 @@ nodist_libTracerCUDART_la_SOURCES = \ $(CUDART_PROBES_INCL) \ tracer_cudart.c -libTracerCUDART_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(srcdir)/include -I../utils -I./ +libTracerCUDART_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/sampling -I$(srcdir)/include -I../utils -I./ libTracerCUDART_la_CFLAGS = -Wall -Wextra $(WERROR) $(LIBFFI_CFLAGS) $(LTTNG_UST_CFLAGS) -libTracerCUDART_la_LDFLAGS = $(LTTNG_UST_LIBS) -ldl -lpthread $(LIBFFI_LIBS) +libTracerCUDART_la_LDFLAGS = $(LTTNG_UST_LIBS) -ldl -lpthread $(LIBFFI_LIBS) ../sampling/libThapiSampling.la libTracerCUDART_la_LDFLAGS += -version-number 12:1:55 -Wl,--version-script,tracer_cudart.map libTracerCUDART_la_LIBADD = libcudarttracepoints.la diff --git a/cuda/tracer_cuda.sh.in b/cuda/tracer_cuda.sh.in index 4d3cd708..c0390577 100644 --- a/cuda/tracer_cuda.sh.in +++ b/cuda/tracer_cuda.sh.in @@ -43,6 +43,7 @@ display_help() { echo " -e, --exports Trace export functions" echo " -v, --visualize Visualize trace on thefly" echo " --properties Dump devices infos" + echo " --sample Sample performance counters" exit 1 } @@ -61,6 +62,7 @@ while true; do -e | --exports ) shift; exports=1;; -v | --visualize ) shift; lttng_view=1;; --properties ) shift; properties=1;; + --sample ) shift; sample=1;; -- ) shift; break ;; * ) break ;; esac @@ -101,6 +103,12 @@ if [ ! -z "$properties" ] then lttng enable-event --channel=blocking-channel --userspace lttng_ust_cuda_properties:* fi +if [ ! -z "$sample" ] +then + export LTTNG_UST_SAMPLING=1 + lttng enable-channel --userspace nonblocking-channel + lttng enable-event --channel=nonblocking-channel --userspace lttng_ust_sampling:* +fi if [ -z "$LTTNG_UST_CUDA_LIBCUDA" ] then LTTNG_UST_CUDA_LIBCUDA=$(whichlib64_head libcuda.so) diff --git a/cuda/tracer_cuda_helpers.include.c b/cuda/tracer_cuda_helpers.include.c index 1e4bf9db..167a3fb4 100644 --- a/cuda/tracer_cuda_helpers.include.c +++ b/cuda/tracer_cuda_helpers.include.c @@ -1,3 +1,5 @@ +#include "thapi_sampling.h" + //pthread_mutex_t cuda_closures_mutex = PTHREAD_MUTEX_INITIALIZER; // //struct cuda_closure { @@ -485,6 +487,8 @@ static void _load_tracer(void) { void *handle = NULL; int verbose = 0; + thapi_sampling_init(); + s = getenv("LTTNG_UST_CUDA_LIBCUDA"); if (s) handle = dlopen(s, RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); diff --git a/cuda/tracer_cudart_helpers.include.c b/cuda/tracer_cudart_helpers.include.c index 181c771c..847510da 100644 --- a/cuda/tracer_cudart_helpers.include.c +++ b/cuda/tracer_cudart_helpers.include.c @@ -1,3 +1,5 @@ +#include "thapi_sampling.h" + static pthread_once_t _init = PTHREAD_ONCE_INIT; static __thread volatile int in_init = 0; static volatile int _initialized = 0; @@ -7,6 +9,8 @@ static void _load_tracer(void) { void *handle = NULL; int verbose = 0; + thapi_sampling_init(); + s = getenv("LTTNG_UST_CUDART_LIBCUDART"); if (s) handle = dlopen(s, RTLD_LAZY | RTLD_LOCAL); diff --git a/hip/Makefile.am b/hip/Makefile.am index 8713ea10..5df50ddb 100644 --- a/hip/Makefile.am +++ b/hip/Makefile.am @@ -277,9 +277,9 @@ nodist_libTracerHIP_la_SOURCES = \ $(HIP_PROBES_INCL) \ tracer_hip.c -libTracerHIP_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(srcdir)/include -I./utils -I./ +libTracerHIP_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/sampling -I$(srcdir)/include -I./utils -I./ libTracerHIP_la_CFLAGS = -Wall -Wextra $(WERROR) $(LIBFFI_CFLAGS) $(LTTNG_UST_CFLAGS) -libTracerHIP_la_LDFLAGS = $(LTTNG_UST_LIBS) -ldl -lpthread $(LIBFFI_LIBS) +libTracerHIP_la_LDFLAGS = $(LTTNG_UST_LIBS) -ldl -lpthread $(LIBFFI_LIBS) ../sampling/libThapiSampling.la libTracerHIP_la_LDFLAGS += -Wl,--version-script,$(srcdir)/hip.map -version-number 5:4:50400 libTracerHIP_la_DEPENDS = $(srcdir)/hip.map libTracerHIP_la_LIBADD = libhiptracepoints.la diff --git a/omp/Makefile.am b/omp/Makefile.am index 609fd0d1..f527efcd 100644 --- a/omp/Makefile.am +++ b/omp/Makefile.am @@ -129,9 +129,9 @@ nodist_libTracerOMPT_la_SOURCES = \ $(OMP_PROBES_INCL) \ tracer_ompt.c -libTracerOMPT_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I./modified_include -I../utils -I./ +libTracerOMPT_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/sampling -I./modified_include -I../utils -I./ libTracerOMPT_la_CFLAGS = -Wall -Wextra $(WERROR) $(LTTNG_UST_CFLAGS) -libTracerOMPT_la_LDFLAGS = $(LTTNG_UST_LIBS) -avoid-version -module +libTracerOMPT_la_LDFLAGS = $(LTTNG_UST_LIBS) -avoid-version -module ../sampling/libThapiSampling.la libTracerOMPT_la_LIBADD = libompttracepoints.la install-exec-hook: diff --git a/omp/tracer_omp.sh.in b/omp/tracer_omp.sh.in index 58c0e01d..7260d45f 100644 --- a/omp/tracer_omp.sh.in +++ b/omp/tracer_omp.sh.in @@ -12,6 +12,7 @@ display_help() { echo " --help Show this screen" echo " --version Print the version string" echo " --disable-intel-extensions Disable Intel extensions" + echo " --sample Sample performance counters" exit 1 } @@ -26,6 +27,7 @@ while true; do --help ) display_help; exit;; --version ) display_version; exit;; --disable-intel-extensions) intel_extensions=false shift;; + --sample ) shift; sample=1;; -- ) shift; break ;; * ) break ;; esac @@ -53,6 +55,12 @@ export LTTNG_UST_ALLOW_BLOCKING=1 if [ "$intel_extensions" = true ] ; then export LTTNG_UST_OMP_INTEL=1 fi +if [ ! -z "$sample" ] +then + export LTTNG_UST_SAMPLING=1 + lttng enable-channel --userspace nonblocking-channel + lttng enable-event --channel=nonblocking-channel --userspace lttng_ust_sampling:* +fi lttng start diff --git a/omp/tracer_ompt_helpers.include.c.erb b/omp/tracer_ompt_helpers.include.c.erb index df6e02b2..b1a03d9d 100644 --- a/omp/tracer_ompt_helpers.include.c.erb +++ b/omp/tracer_ompt_helpers.include.c.erb @@ -1,3 +1,5 @@ +#include "thapi_sampling.h" + <% require "yaml" %> #define _OMPT_SET_CALLBACK(value, name) \ @@ -33,6 +35,8 @@ static int _ompt_initialize(ompt_function_lookup_t lookup, int do_callbacks_intel = 0; int verbose = 0; + thapi_sampling_init(); + if (getenv("LTTNG_UST_OMP_INTEL")) do_callbacks_intel = 1; if (getenv("LTTNG_UST_OMP_VERBOSE")) diff --git a/opencl/Makefile.am b/opencl/Makefile.am index c545c170..ae235e8a 100644 --- a/opencl/Makefile.am +++ b/opencl/Makefile.am @@ -157,9 +157,9 @@ nodist_libTracerOpenCL_la_SOURCES = \ tracer_opencl.h \ tracer_opencl.c -libTracerOpenCL_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(srcdir)/include -I../utils -I./ +libTracerOpenCL_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/sampling -I$(srcdir)/include -I../utils -I./ libTracerOpenCL_la_CFLAGS = -Wall -Wextra -Wno-unused-parameter $(WERROR) $(LIBFFI_CFLAGS) $(LTTNG_UST_CFLAGS) -libTracerOpenCL_la_LDFLAGS = $(LTTNG_UST_LIBS) -ldl -lpthread $(LIBFFI_LIBS) +libTracerOpenCL_la_LDFLAGS = $(LTTNG_UST_LIBS) -ldl -lpthread $(LIBFFI_LIBS) ../sampling/libThapiSampling.la libTracerOpenCL_la_LDFLAGS += -Wl,--version-script,$(srcdir)/tracer_opencl.map -version-info 1:0:0 libTracerOpenCL_la_DEPENDS = $(srcdir)/tracer_opencl.map libTracerOpenCL_la_LIBADD = libtracepoints.la diff --git a/opencl/tracer_opencl.sh.in b/opencl/tracer_opencl.sh.in index eceb34a1..67a866e5 100644 --- a/opencl/tracer_opencl.sh.in +++ b/opencl/tracer_opencl.sh.in @@ -49,6 +49,7 @@ display_help() { echo " -e, --iteration-end VALUE Dump inputs and outputs for kernels until enqueue counter VALUE" echo " -v, --visualize Visualize trace on thefly" echo " --devices Dump devices information" + echo " --sample Sample performance counters" exit 1 } @@ -73,6 +74,7 @@ while true; do -e | --iteration-end ) shift; export LTTNG_UST_OPENCL_DUMP=1; export LTTNG_UST_OPENCL_DUMP_END=$1; shift ;; -v | --visualize) shift; lttng_view=1;; --devices ) shift; devices=1;; + --sample ) shift; sample=1;; -- ) shift; break ;; * ) break ;; esac @@ -117,6 +119,12 @@ if [ ! -z "$build" ] then lttng enable-event --channel=blocking-channel --userspace lttng_ust_opencl_build:* fi +if [ ! -z "$sample" ] +then + export LTTNG_UST_SAMPLING=1 + lttng enable-channel --userspace nonblocking-channel + lttng enable-event --channel=nonblocking-channel --userspace lttng_ust_sampling:* +fi if [ ! -z "$LTTNG_UST_OPENCL_DUMP" ] then lttng enable-event --channel=blocking-channel --userspace lttng_ust_opencl_dump:* diff --git a/opencl/tracer_opencl_helpers.include.c b/opencl/tracer_opencl_helpers.include.c index ad567538..ee9c715d 100644 --- a/opencl/tracer_opencl_helpers.include.c +++ b/opencl/tracer_opencl_helpers.include.c @@ -1,3 +1,5 @@ +#include "thapi_sampling.h" + void CL_CALLBACK event_notify (cl_event event, cl_int event_command_exec_status, void *user_data) { (void)user_data; if (tracepoint_enabled(lttng_ust_opencl_profiling, event_profiling_results)) { @@ -1180,6 +1182,8 @@ static void _load_tracer(void) { void * handle = NULL; int verbose = 0; + thapi_sampling_init(); + s = getenv("LTTNG_UST_OPENCL_LIBOPENCL"); if (s) handle = dlopen(s, RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); diff --git a/sampling/Makefile.am b/sampling/Makefile.am new file mode 100644 index 00000000..00efa4d6 --- /dev/null +++ b/sampling/Makefile.am @@ -0,0 +1,59 @@ +if STRICT + WERROR = -Werror +else + WERROR = +endif +LTTNG_FLAGS = -fPIC -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Wno-sign-compare $(WERROR) -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(srcdir)/include -I../utils -I./ + + +TRACEPOINT_GEN = \ + $(srcdir)/sampling_events.yaml + +SAMPLING_STATIC_PROBES = \ + sampling + +SAMPLING_STATIC_PROBES_TP = $(SAMPLING_STATIC_PROBES:=.tp) + +SAMPLING_STATIC_PROBES_INCL = $(SAMPLING_STATIC_PROBES:=.h) + +SAMPLING_STATIC_PROBES_SRC = $(SAMPLING_STATIC_PROBES:=.c) + +$(SAMPLING_STATIC_PROBES_TP): %.tp: $(srcdir)/gen_sampling_custom_probes.rb $(TRACEPOINT_GEN) + SRC_DIR=$(srcdir) $(RUBY) $< lttng_ust_$* > $@ + +%.h %.c: %.tp + $(LTTNG_GEN_TP) $< -o $*.c -o $*.h + +noinst_LTLIBRARIES = libtracepoints.la + +nodist_libtracepoints_la_SOURCES = \ + $(SAMPLING_STATIC_PROBES_INCL) \ + $(SAMPLING_STATIC_PROBES_SRC) + +libtracepoints_la_CFLAGS = $(LTTNG_FLAGS) $(LTTNG_UST_CFLAGS) +libtracepoints_la_LDFLAGS = $(LTTNG_UST_LIBS) + +EXTRA_DIST = \ + sampling_events.yaml \ + gen_sampling_custom_probes.rb + +CLEANFILES = \ + $(SAMPLING_STATIC_PROBES_INCL) \ + $(SAMPLING_STATIC_PROBES_TP) \ + $(SAMPLING_STATIC_PROBES_SRC) + +BUILT_SOURCES = \ + $(SAMPLING_STATIC_PROBES_INCL) + +nodist_libThapiSampling_la_SOURCES = \ + $(SAMPLING_STATIC_PROBES_INCL) + +libThapiSampling_la_SOURCES = \ + thapi_sampling.h \ + thapi_sampling.c + +libThapiSampling_la_CFLAGS = -Wall -Wextra -Wno-unused-parameter $(WERROR) -I$(top_srcdir)/utils/include +libThapiSampling_la_LDFLAGS = -lpthread -version-info 1:0:0 +libThapiSampling_la_LIBADD = libtracepoints.la + +lib_LTLIBRARIES = libThapiSampling.la diff --git a/sampling/gen_sampling_custom_probes.rb b/sampling/gen_sampling_custom_probes.rb new file mode 100644 index 00000000..c5fc181e --- /dev/null +++ b/sampling/gen_sampling_custom_probes.rb @@ -0,0 +1,32 @@ +require 'yaml' +require_relative '../utils/LTTng' + +if ENV["SRC_DIR"] + SRC_DIR = ENV["SRC_DIR"] +else + SRC_DIR = "." +end + +namespace = ARGV[0] + +raise "No namespace provided!" unless namespace + +h = YAML::load_file(File.join(SRC_DIR,"sampling_events.yaml"))[namespace] + +raise "Invalid namespace!" unless h + +puts < +#include +#include +#include +#include +#include +#include "thapi_sampling.h" +#include "sampling.h" +#include "utarray.h" + +struct sampling_entry { + void (*pfn)(void); + struct timespec interval; + struct timespec next; +}; + + +static pthread_mutex_t thapi_sampling_mutex = PTHREAD_MUTEX_INITIALIZER; +static UT_array *thapi_sampling_events = NULL; + +static pthread_once_t thapi_init_once = PTHREAD_ONCE_INIT; +static volatile int thapi_sampling_finished = 0; +static pthread_t thapi_sampling_thread; + +static void thapi_sampling_cleanup() { + thapi_sampling_finished = 1; + pthread_join(thapi_sampling_thread, NULL); + pthread_mutex_lock(&thapi_sampling_mutex); + struct sampling_entry **entry = NULL; + while ((entry = (struct sampling_entry **)utarray_next(thapi_sampling_events, entry))) + free(*entry); + utarray_free(thapi_sampling_events); + pthread_mutex_unlock(&thapi_sampling_mutex); +} + +static inline int time_cmp(const struct timespec * t1, const struct timespec * t2) { + if (t1->tv_sec < t2->tv_sec) + return -1; + if (t1->tv_sec > t2->tv_sec) + return 1; + if (t1->tv_nsec < t2->tv_nsec) + return -1; + if (t1->tv_nsec > t2->tv_nsec) + return 1; + return 0; +} + +static inline int sampling_entry_cmp(const struct sampling_entry **e1, const struct sampling_entry **e2) { + return time_cmp(&(*e1)->next, &(*e2)->next); +} + +static inline int sampling_entry_cmpw(const void * t1, const void * t2) { + return sampling_entry_cmp((const struct sampling_entry **)t1, (const struct sampling_entry **)t2); +} + +static inline void time_add(struct timespec *dest, const struct timespec *t, const struct timespec *d) { + dest->tv_nsec = t->tv_nsec + d->tv_nsec; + dest->tv_sec = t->tv_sec + d->tv_sec; + while (dest->tv_nsec > 999999999) { + dest->tv_sec += 1; + dest->tv_nsec -= 1000000000; + } +} + +void * thapi_sampling_loop(void *args) { + (void)args; + while(!thapi_sampling_finished) { + struct timespec now; + struct sampling_entry **entry = NULL; + + pthread_mutex_lock(&thapi_sampling_mutex); + clock_gettime(CLOCK_REALTIME, &now); + while ((entry = (struct sampling_entry **)utarray_next(thapi_sampling_events, entry)) && + time_cmp(&(*entry)->next, &now) < 0) { + (*entry)->pfn(); + time_add(&(*entry)->next, &(*entry)->next, &(*entry)->interval); + if(time_cmp(&(*entry)->next, &now) < 0) + time_add(&(*entry)->next, &now, &(*entry)->interval); + } + utarray_sort(thapi_sampling_events, sampling_entry_cmpw); + entry = (struct sampling_entry **)utarray_front(thapi_sampling_events); + pthread_mutex_unlock(&thapi_sampling_mutex); + if (entry) + while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &(*entry)->next, NULL) && !thapi_sampling_finished) + ; + } + return NULL; +} + +static void thapi_sampling_heartbeat() { + do_tracepoint(lttng_ust_sampling, heartbeat, 16); +} + +static void thapi_sampling_heartbeat2() { + do_tracepoint(lttng_ust_sampling, heartbeat2); +} + +void thapi_sampling_init_once() { + struct timespec interval; + utarray_new(thapi_sampling_events, &ut_ptr_icd); + if (!thapi_sampling_events) + return; + if (getenv("LTTNG_UST_SAMPLING_HEARTBEAT")) { + interval.tv_sec = 0; + interval.tv_nsec = 100000000; + thapi_register_sampling(&thapi_sampling_heartbeat, &interval); + } + if (getenv("LTTNG_UST_SAMPLING_HEARTBEAT2")) { + interval.tv_sec = 0; + interval.tv_nsec = 30000000; + thapi_register_sampling(&thapi_sampling_heartbeat2, &interval); + } + if (!pthread_create(&thapi_sampling_thread, NULL, &thapi_sampling_loop, NULL)) + atexit(&thapi_sampling_cleanup); +} + +int thapi_sampling_init() { + if (getenv("LTTNG_UST_SAMPLING")) + pthread_once(&thapi_init_once, &thapi_sampling_init_once); + return 1; +} + +void thapi_register_sampling(void (*pfn)(void), struct timespec *interval) { + struct sampling_entry *entry = NULL; + struct timespec now, next; + if(clock_gettime(CLOCK_REALTIME, &now)) + return; + time_add(&next, &now, interval); + + pthread_mutex_lock(&thapi_sampling_mutex); + if (!thapi_sampling_events) + goto end; + entry = (struct sampling_entry *)malloc(sizeof(struct sampling_entry)); + if (!entry) + goto end; + entry->pfn = pfn; + entry->interval = *interval; + entry->next = next; + utarray_push_back(thapi_sampling_events, &entry); + utarray_sort(thapi_sampling_events, sampling_entry_cmpw); +end: + pthread_mutex_unlock(&thapi_sampling_mutex); +} diff --git a/sampling/thapi_sampling.h b/sampling/thapi_sampling.h new file mode 100644 index 00000000..2dce7a46 --- /dev/null +++ b/sampling/thapi_sampling.h @@ -0,0 +1,5 @@ +#include + +extern int thapi_sampling_init(); + +extern void thapi_register_sampling(void (*pfn)(void), struct timespec *interval); diff --git a/utils/Makefile.am b/utils/Makefile.am index 00dbba4d..53ba6a9d 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -42,7 +42,9 @@ thapi_metadata_LDFLAGS = $(LTTNG_UST_LIBS) bin_PROGRAMS = thapi_metadata -bin_SCRIPTS = babeltrace_thapi +bin_SCRIPTS = \ + babeltrace_thapi \ + babeltrace_energy .PHONY: version diff --git a/utils/babeltrace_energy.in b/utils/babeltrace_energy.in new file mode 100755 index 00000000..203720ea --- /dev/null +++ b/utils/babeltrace_energy.in @@ -0,0 +1,138 @@ +#!/usr/bin/env ruby +DATADIR = File.join("@prefix@", "share") +$:.unshift(DATADIR) if File::directory?(DATADIR) +require 'optparse' +require 'babeltrace2' +require 'find' +require 'yaml' +require 'pp' + +$event_lambdas = {} +#require 'babeltrace_opencl_lib' +#require 'babeltrace_ze_lib' +#require 'babeltrace_cuda_lib' +#require 'babeltrace_omp_lib' + +$options = { + context: false, + restrict: false, + live: false +} + +OptionParser.new do |opts| + opts.banner = "Usage: babeltrace_thapi [OPTIONS] target_trace_directory" + + opts.on("-c", "--[no-]context", "Add context information") do |context| + $options[:context] = context + end + + opts.on("-r", "--[no-]restrict", "Restrict output to recognized events") do |restrict| + $options[:restrict] = restrict + end + + opts.on("-h", "--help", "Prints this help") do + puts opts + exit + end + + opts.on("--live", "Enable live display of the trace") do + $options[:live] = true + end + + opts.on("-v", "--version", "Print the version string") do + puts File.read(File.join(DATADIR, "version")) + exit + end + +end.parse! + +$restrict = $options[:restrict] +$context = $options[:context] +$live = $options[:live] + +ctf_fs = BT2::BTPlugin.find("ctf").get_source_component_class_by_name("fs") +ctf_lttng_live = BT2::BTPlugin.find("ctf").get_source_component_class_by_name("lttng-live") +utils_muxer = BT2::BTPlugin.find("utils").get_filter_component_class_by_name("muxer") +text_pretty = BT2::BTPlugin.find("text").get_sink_component_class_by_name("pretty") + +if !$live + trace_locations = Find.find(*ARGV).reject { |path| + FileTest.directory?(path) + }.select { |path| + File.basename(path) == "metadata" + }.collect { |path| + File.dirname(path) + }.select { |path| + qe = BT2::BTQueryExecutor.new( component_class: ctf_fs, object_name: "babeltrace.support-info", params: { "input" => path, "type" => "directory" } ) + qe.query.value["weight"] > 0.5 + } +else + trace_locations = ARGV +end +raise "Could not find lttng trace" if trace_locations.size == 0 +$energies={} +$event_lambdas["lttng_ust_ze_sampling:gpu_energy"] = lambda { |event| + defi=event.payload_field.value + device = defi['hDevice'] + domain = defi['domain'] + energy = defi['energy'] + timestamp = defi['timestamp'] + key = device, domain + previous = $energies[key] + if previous + p_energy, p_timestamp = previous + puts "#{key[0]}:#{key[1]}: #{(energy - p_energy).to_f/(timestamp - p_timestamp)}" + end + $energies[key] = [energy, timestamp] +} + +consume = lambda { |iterator, _| + mess = iterator.next_messages + mess.each { |m| + if m.type == :BT_MESSAGE_TYPE_EVENT + + e = m.event + #puts e.name + l = $event_lambdas[e.name] + if l + l.call e +# str = "#{Time.at(0, m.get_default_clock_snapshot.ns_from_origin, :nsec).strftime("%H:%M:%S.%9L")}" +# if $context +# str << " - #{e.stream.trace.get_environment_entry_value_by_name("hostname")}" +# common_context_field = e.get_common_context_field +# str << " - " << common_context_field.value.collect { |k, v| "#{k}: #{v}" }.join(", ") if common_context_field +# end +# str << " - #{e.name}: " +# if l +# str << l.call(e.payload_field.value) +# else +# str << e.payload_field.to_s +# end +# puts str + end + end + } +} + +graph = BT2::BTGraph.new +if !$live + comps = trace_locations.each_with_index.collect { |trace_location, i| graph.add_component(ctf_fs, "trace_#{i}", params: {"inputs" => [ trace_location ] }) } +else + comps = trace_locations.each_with_index.collect { |trace_location, i| graph.add_component(ctf_lttng_live, "trace_#{i}", params: {"inputs" => [ trace_location ], "session-not-found-action" => "end" }) } +end +comp2 = graph.add_component(utils_muxer, "mux") +comp3 = graph.add_simple_sink("babeltrace_thapi", consume) +i = 0 +comps.each { |comp| + ops = comp.output_ports + ops.each { |op| + ip = comp2.input_port(i) + i += 1 + graph.connect_ports(op, ip) + } +} + +op = comp2.output_port(0) +ip = comp3.input_port(0) +graph.connect_ports(op, ip) +graph.run diff --git a/utils/babeltrace_thapi.in b/utils/babeltrace_thapi.in index 52ba9d22..794978e4 100755 --- a/utils/babeltrace_thapi.in +++ b/utils/babeltrace_thapi.in @@ -170,9 +170,8 @@ def get_components(names) str = Time.at(0, m.get_default_clock_snapshot.ns_from_origin, :nsec).strftime('%H:%M:%S.%9L').to_s if $options[:context] str << " - #{e.stream.trace.get_environment_entry_value_by_name('hostname')}" - str << ' - ' << e.get_common_context_field.value.collect do |k, v| - "#{k}: #{v}" - end.join(', ') + common_context_field = e.get_common_context_field + str << " - " << common_context_field.value.collect { |k, v| "#{k}: #{v}" }.join(", ") if common_context_field end str << " - #{e.name}: " str << (l ? l.call(e.payload_field.value) : e.payload_field.to_s) diff --git a/utils/xprof_utils.cpp b/utils/xprof_utils.cpp index 07f141fc..5527e3eb 100644 --- a/utils/xprof_utils.cpp +++ b/utils/xprof_utils.cpp @@ -20,6 +20,104 @@ thread_id_t borrow_thread_id(const bt_event *event){ return bt_field_integer_unsigned_get_value(field); } + +bt_message* create_power_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t domain, const uint64_t power, const uint64_t ts, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) { + + /* Message creation */ + bt_message *message = bt_message_event_create( + message_iterator, event_class, stream); + + /* event */ + bt_event *downstream_event = bt_message_event_borrow_event(message); + + /* Common context */ + bt_field *context_field = bt_event_borrow_common_context_field(downstream_event); + + // Hostname + bt_field *hostname_msg_field = bt_field_structure_borrow_member_field_by_index(context_field,0); + bt_field_string_set_value(hostname_msg_field, hostname); + // pid + bt_field *vpid_field = bt_field_structure_borrow_member_field_by_index(context_field,1); + bt_field_integer_signed_set_value(vpid_field, process_id); + // vid + bt_field *vtid_field = bt_field_structure_borrow_member_field_by_index(context_field,2); + bt_field_integer_signed_set_value(vtid_field, thread_id); + // ts + bt_field *ts_field = bt_field_structure_borrow_member_field_by_index(context_field,3); + bt_field_integer_signed_set_value(ts_field, ts); + // backend + bt_field *backend_field = bt_field_structure_borrow_member_field_by_index(context_field,4); + bt_field_integer_signed_set_value(backend_field, backend); + + /* Payload */ + bt_field *payload_field = bt_event_borrow_payload_field(downstream_event); + + // did + bt_field *device_id_field = bt_field_structure_borrow_member_field_by_index(payload_field,0); + bt_field_integer_unsigned_set_value(device_id_field, hDevice); + + // domain + bt_field *domain_field = bt_field_structure_borrow_member_field_by_index(payload_field,1); + bt_field_integer_unsigned_set_value(domain_field, domain); + + // power + bt_field *power_field = bt_field_structure_borrow_member_field_by_index(payload_field,2); + bt_field_integer_unsigned_set_value(power_field, power); + + return message; +} + + +bt_message* create_frequency_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t domain, const uint64_t ts, const uint64_t frequency, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) { + + /* Message creation */ + bt_message *message = bt_message_event_create( + message_iterator, event_class, stream); + + /* event */ + bt_event *downstream_event = bt_message_event_borrow_event(message); + + /* Common context */ + bt_field *context_field = bt_event_borrow_common_context_field(downstream_event); + + // Hostname + bt_field *hostname_msg_field = bt_field_structure_borrow_member_field_by_index(context_field,0); + bt_field_string_set_value(hostname_msg_field, hostname); + // pid + bt_field *vpid_field = bt_field_structure_borrow_member_field_by_index(context_field,1); + bt_field_integer_signed_set_value(vpid_field, process_id); + // vid + bt_field *vtid_field = bt_field_structure_borrow_member_field_by_index(context_field,2); + bt_field_integer_signed_set_value(vtid_field, thread_id); + // ts + bt_field *ts_field = bt_field_structure_borrow_member_field_by_index(context_field,3); + bt_field_integer_signed_set_value(ts_field, ts); + // backend + bt_field *backend_field = bt_field_structure_borrow_member_field_by_index(context_field,4); + bt_field_integer_signed_set_value(backend_field, backend); + + /* Payload */ + bt_field *payload_field = bt_event_borrow_payload_field(downstream_event); + + // did + bt_field *device_id_field = bt_field_structure_borrow_member_field_by_index(payload_field,0); + bt_field_integer_unsigned_set_value(device_id_field, hDevice); + + // domain + bt_field *domain_field = bt_field_structure_borrow_member_field_by_index(payload_field,1); + bt_field_integer_unsigned_set_value(domain_field, domain); + + // frequency + bt_field *frequency_field = bt_field_structure_borrow_member_field_by_index(payload_field,2); + bt_field_integer_unsigned_set_value(frequency_field, frequency); + + return message; +} + bt_message* create_host_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, const char* name, const uint64_t ts, const uint64_t duration, const bool err, bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) { diff --git a/utils/xprof_utils.hpp b/utils/xprof_utils.hpp index cb65f43f..7136a95d 100644 --- a/utils/xprof_utils.hpp +++ b/utils/xprof_utils.hpp @@ -110,6 +110,15 @@ const char *borrow_hostname(const bt_event *); process_id_t borrow_process_id(const bt_event *); thread_id_t borrow_thread_id(const bt_event *); +bt_message* create_power_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t domain, const uint64_t power, const uint64_t ts, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); + + +bt_message* create_frequency_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t domain, const uint64_t ts, const uint64_t frequency, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); + bt_message *create_host_message(const char *hostname, const process_id_t, const thread_id_t, const char *name, const uint64_t ts, const uint64_t duration, const bool err, bt_event_class *, bt_self_message_iterator *, diff --git a/xprof/btx_interval_model.yaml b/xprof/btx_interval_model.yaml index 31b526b1..f62aac1a 100644 --- a/xprof/btx_interval_model.yaml +++ b/xprof/btx_interval_model.yaml @@ -89,3 +89,41 @@ :type: integer_unsigned :field_value_range: 64 :cast_type: uint64_t + - :name: lttng:frequency + :payload_field_class: + :type: structure + :members: + - :name: did + :field_class: + :type: integer_unsigned + :field_value_range: 64 + :cast_type: uint64_t + - :name: domain + :field_class: + :type: integer_unsigned + :field_value_range: 32 + :cast_type: uint32_t + - :name: frequency + :field_class: + :type: integer_unsigned + :field_value_range: 64 + :cast_type: uint64_t + - :name: lttng:power + :payload_field_class: + :type: structure + :members: + - :name: did + :field_class: + :type: integer_unsigned + :field_value_range: 64 + :cast_type: uint64_t + - :name: domain + :field_class: + :type: integer_unsigned + :field_value_range: 32 + :cast_type: uint32_t + - :name: power + :field_class: + :type: integer_unsigned + :field_value_range: 64 + :cast_type: uint64_t diff --git a/xprof/btx_timeline.cpp b/xprof/btx_timeline.cpp index 00a40aef..50ebe915 100644 --- a/xprof/btx_timeline.cpp +++ b/xprof/btx_timeline.cpp @@ -26,6 +26,12 @@ struct timeline_dispatch_s { std::unordered_map, std::map> track2lasts; + + std::unordered_map hp2frqtracks; + std::unordered_map hp2pwrtracks; + std::unordered_map hp_devs2frqtracks; + std::unordered_map hp_devs2pwrtracks; + perfetto_pruned::Trace trace; }; using timeline_dispatch_t = struct timeline_dispatch_s; @@ -36,6 +42,102 @@ static perfetto_uuid_t gen_perfetto_uuid() { return uuid++; } +static perfetto_uuid_t get_parent_counter_track_uuid(timeline_dispatch_t *dispatch, std::unordered_map &parent_tracks, + const std::string track_name, std::string hostname, uint64_t process_id) { + perfetto_uuid_t hp_uuid = 0; + auto [it, inserted] = parent_tracks.insert({{hostname, process_id}, hp_uuid}); + auto &potential_uuid = it->second; + // Exists + if (!inserted) + return potential_uuid; + + hp_uuid = gen_perfetto_uuid(); + potential_uuid = hp_uuid; + + // Create packet with track descriptor + auto *packet = dispatch->trace.add_packet(); + packet->set_trusted_packet_sequence_id(10000); + packet->set_timestamp(0); + packet->set_previous_packet_dropped(true); + auto *track_descriptor = packet->mutable_track_descriptor(); + track_descriptor->set_uuid(hp_uuid); + auto *process = track_descriptor->mutable_process(); + process->set_pid(hp_uuid); + std::ostringstream oss; + oss << "Hostname " << hostname << " | Process " << process_id; + oss << " | " << track_name << " | uuid "; + process->set_process_name(oss.str()); + return hp_uuid; +} + +static perfetto_uuid_t get_counter_track_uuuid(timeline_dispatch_t *dispatch, std::unordered_map &parent_tracks, + std::unordered_map &counter_tracks, const std::string track_name, + std::string hostname, uint64_t process_id, thapi_device_id did) { + perfetto_uuid_t hp_dev_uuid = 0; + auto [it, inserted] = counter_tracks.insert({{hostname, process_id, did}, hp_dev_uuid}); + auto &potential_uuid = it->second; + // Exists + if (!inserted) + return potential_uuid; + + perfetto_uuid_t hp_uuid = get_parent_counter_track_uuid(dispatch, parent_tracks, track_name, hostname, process_id); + hp_dev_uuid = gen_perfetto_uuid(); + potential_uuid = hp_dev_uuid; + + // Create new track + auto *packet = dispatch->trace.add_packet(); + packet->set_timestamp(0); + packet->set_trusted_packet_sequence_id(10000); + auto *track_descriptor = packet->mutable_track_descriptor(); + track_descriptor->set_uuid(hp_dev_uuid); + track_descriptor->set_parent_uuid(hp_uuid); + std::ostringstream oss; + oss << "Device " << did; + track_descriptor->set_name(oss.str()); + track_descriptor->mutable_counter(); + return hp_dev_uuid; +} + +static perfetto_uuid_t get_frequency_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, + uint64_t process_id, thapi_device_id did) { + return get_counter_track_uuuid(dispatch, dispatch->hp2frqtracks, dispatch->hp_devs2frqtracks, "GPU Frequency", hostname, process_id, did); +} + +static perfetto_uuid_t get_power_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, + uint64_t process_id, thapi_device_id did) { + return get_counter_track_uuuid(dispatch, dispatch->hp2pwrtracks, dispatch->hp_devs2pwrtracks, "GPU Power", hostname, process_id, did); +} + +static void add_event_frequency(timeline_dispatch_t *dispatch, std::string hostname, + uint64_t process_id, uint64_t thread_id, uintptr_t did, + uint32_t domain, uint64_t timestamp, uint64_t frequency) { + (void)domain; + perfetto_uuid_t track_uuid = get_frequency_track_uuuid(dispatch, hostname, process_id, did); + auto *packet = dispatch->trace.add_packet(); + packet->set_trusted_packet_sequence_id(10000); + packet->set_timestamp(timestamp); + auto *track_event = packet->mutable_track_event(); + track_event->set_type(perfetto_pruned::TrackEvent::TYPE_COUNTER); + track_event->set_track_uuid(track_uuid); + track_event->set_name("Frequency"); + track_event->set_counter_value(frequency); +} + +static void add_event_power(timeline_dispatch_t *dispatch, std::string hostname, + uint64_t process_id, uint64_t thread_id, uintptr_t did, + uint32_t domain, uint64_t timestamp, uint64_t power) { + (void)domain; + perfetto_uuid_t track_uuid = get_power_track_uuuid(dispatch, hostname, process_id, did); + auto *packet = dispatch->trace.add_packet(); + packet->set_trusted_packet_sequence_id(10000); + packet->set_timestamp(timestamp); + auto *track_event = packet->mutable_track_event(); + track_event->set_type(perfetto_pruned::TrackEvent::TYPE_COUNTER); + track_event->set_track_uuid(track_uuid); + track_event->set_name("Power"); + track_event->set_counter_value(power); +} + static void add_event_begin(timeline_dispatch_t *dispatch, perfetto_uuid_t uuid, timestamp_t begin, std::string name) { auto *packet = dispatch->trace.add_packet(); @@ -239,9 +341,25 @@ static void device_usr_callback(void *btx_handle, void *usr_data, const char *ho add_event_async(dispatch, hostname, vpid, vtid, did, sdid, name, ts, dur); } +static void frequency_usr_callback(void *btx_handle, void *usr_data, const char *hostname, + int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, + uint64_t did, uint32_t domain, uint64_t frequency) { + auto *dispatch = static_cast(usr_data); + add_event_frequency(dispatch, hostname, vpid, vtid, did, domain, ts, frequency); +} + +static void power_usr_callback(void *btx_handle, void *usr_data, const char *hostname, + int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, + uint64_t did, uint32_t domain, uint64_t power) { + auto *dispatch = static_cast(usr_data); + add_event_power(dispatch, hostname, vpid, vtid, did, domain, ts, power); +} + void btx_register_usr_callbacks(void *btx_handle) { btx_register_callbacks_lttng_host(btx_handle, &host_usr_callback); btx_register_callbacks_lttng_device(btx_handle, &device_usr_callback); + btx_register_callbacks_lttng_frequency(btx_handle, &frequency_usr_callback); + btx_register_callbacks_lttng_power(btx_handle, &power_usr_callback); btx_register_callbacks_initialize_usr_data(btx_handle, &btx_initialize_usr_data); btx_register_callbacks_finalize_usr_data(btx_handle, &btx_finalize_usr_data); } diff --git a/xprof/interval.c.erb b/xprof/interval.c.erb index 3412390f..8e09ef4d 100644 --- a/xprof/interval.c.erb +++ b/xprof/interval.c.erb @@ -97,6 +97,8 @@ bt_component_class_initialize_method_status <%= namespace %>_dispatch_initialize dispatch->device_event_class = create_lttng_device_event_class_message(trace_class, stream_class); dispatch->traffic_event_class = create_lttng_traffic_event_class_message(trace_class, stream_class); dispatch->device_name_event_class = create_lttng_device_name_event_class_message(trace_class, stream_class); + dispatch->frequency_event_class = create_lttng_frequency_event_class_message(trace_class, stream_class); + dispatch->power_event_class = create_lttng_power_event_class_message(trace_class, stream_class); /* Create a default trace from (instance of `trace_class`) */ bt_trace *trace = bt_trace_create(trace_class); diff --git a/xprof/interval.h.erb b/xprof/interval.h.erb index ce94241f..2ebdb184 100644 --- a/xprof/interval.h.erb +++ b/xprof/interval.h.erb @@ -56,7 +56,8 @@ struct <%= namespace %>_dispatch { bt_event_class *device_event_class; bt_event_class *traffic_event_class; bt_event_class *device_name_event_class; - + bt_event_class *frequency_event_class; + bt_event_class *power_event_class; /* Component's input port (weak) */ bt_self_component_port_input *in_port; }; diff --git a/xprof/interval_model.yaml b/xprof/interval_model.yaml index 098bb85e..0a9df04a 100644 --- a/xprof/interval_model.yaml +++ b/xprof/interval_model.yaml @@ -53,4 +53,28 @@ - :name: name :class: string - :name: size - :class: unsigned + :class: unsigned +- :name: lttng:frequency + :payload: + - :name: did + :class: unsigned + :class_properties: + :preferred_display_base: 16 + - :name: domain + :class: unsigned + :class_properties: + :field_value_range: 32 + - :name: frequency + :class: unsigned +- :name: lttng:power + :payload: + - :name: did + :class: unsigned + :class_properties: + :preferred_display_base: 16 + - :name: domain + :class: unsigned + :class_properties: + :field_value_range: 32 + - :name: power + :class: unsigned diff --git a/xprof/perfetto_prunned.proto b/xprof/perfetto_prunned.proto index 34719c2f..c20070fb 100644 --- a/xprof/perfetto_prunned.proto +++ b/xprof/perfetto_prunned.proto @@ -1,7 +1,46 @@ +// AUTOGENERATED - DO NOT EDIT +// --------------------------- +// This file has been generated by +// AOSP://external/perfetto/tools/gen_merged_protos +// merging the perfetto config protos. +// This fused proto is intended to be copied in: +// - Android tree, for statsd. +// - Google internal repos. + syntax = "proto2"; -// "There Is a Light That Never Goes Out" -// https://android.googlesource.com/platform/external/perfetto/+/refs/heads/master/protos/perfetto/trace/ + package perfetto_pruned; +// Begin of protos/perfetto/common/track_event_descriptor.proto + +message TrackEventCategory { + optional string name = 1; + optional string description = 2; + repeated string tags = 3; +} + +message TrackEventDescriptor { + repeated TrackEventCategory available_categories = 1; +} + +// End of protos/perfetto/common/track_event_descriptor.proto + +// Begin of protos/perfetto/common/data_source_descriptor.proto + +// This message is sent from Producer(s) to the tracing Service when registering +// to advertise their capabilities. It describes the structure of tracing +// protos that will be produced by the data source and the supported filters. +message DataSourceDescriptor { + optional TrackEventDescriptor track_event_descriptor = 6 [lazy = true]; + +} + + +// Reports the state of the tracing service. Used to gather details about the +// data sources connected. +// See ConsumerPort::QueryServiceState(). +// End of protos/perfetto/common/tracing_service_state.proto + +// Begin of protos/perfetto/common/builtin_clock.proto enum BuiltinClock { BUILTIN_CLOCK_UNKNOWN = 0; @@ -12,65 +51,1100 @@ enum BuiltinClock { BUILTIN_CLOCK_MONOTONIC_RAW = 5; BUILTIN_CLOCK_BOOTTIME = 6; BUILTIN_CLOCK_MAX_ID = 63; - reserved 7, 8, 9; + + reserved 7, 8; + + // An internal CL (ag/16521245) has taken this for BUILTIN_CLOCK_TSC. + // That might get upstreamed later on. Avoid diverging on this ID in future. + reserved 9; +} + +message TrackEventConfig { + // The following fields define the set of enabled trace categories. Each list + // item is a glob. + // + // To determine if category is enabled, it is checked against the filters in + // the following order: + // + // 1. Exact matches in enabled categories. + // 2. Exact matches in enabled tags. + // 3. Exact matches in disabled categories. + // 4. Exact matches in disabled tags. + // 5. Pattern matches in enabled categories. + // 6. Pattern matches in enabled tags. + // 7. Pattern matches in disabled categories. + // 8. Pattern matches in disabled tags. + // + // If none of the steps produced a match, the category is enabled by default. + // + // Examples: + // + // - To enable all non-slow/debug categories: + // + // No configuration needed, happens by default. + // + // - To enable a specific category: + // + // disabled_categories = ["*"] + // enabled_categories = ["my_category"] + // + // - To enable only categories with a specific tag: + // + // disabled_tags = ["*"] + // enabled_tags = ["my_tag"] + // + + // Default: [] + repeated string disabled_categories = 1; + + // Default: [] + repeated string enabled_categories = 2; + + // Default: ["slow", "debug"] + repeated string disabled_tags = 3; + + // Default: [] + repeated string enabled_tags = 4; + + // Default: false (i.e. enabled by default) + optional bool disable_incremental_timestamps = 5; + + // Allows to specify a custom unit different than the default (ns). + // Also affects thread timestamps if enable_thread_time_sampling = true. + // A multiplier of 1000 means that a timestamp = 3 should be interpreted as + // 3000 ns = 3 us. + // Default: 1 (if unset, it should be read as 1). + optional uint64 timestamp_unit_multiplier = 6; + +} + +// End of protos/perfetto/config/track_event/track_event_config.proto + +// Begin of protos/perfetto/config/data_source_config.proto + +// The configuration that is passed to each data source when starting tracing. +// Next id: 124 +message DataSourceConfig { + // Data source unique name, e.g., "linux.ftrace". This must match + // the name passed by the data source when it registers (see + // RegisterDataSource()). + optional string name = 1; + + // Data source name: track_event + optional TrackEventConfig track_event_config = 113 [lazy = true]; + // optional string legacy_config = 1000; + + // This field is only used for testing. + //optional TestConfig for_testing = 1001; + + // Was |for_testing|. Caused more problems then found. + reserved 268435455; +} + +// End of protos/perfetto/config/data_source_config.proto + +// Begin of protos/perfetto/config/trace_config.proto + +// The overall config that is used when starting a new tracing session through +// ProducerPort::StartTracing(). +// It contains the general config for the logging buffer(s) and the configs for +// all the data source being enabled. +// +// Next id: 38. +message TraceConfig { + message BufferConfig { + optional uint32 size_kb = 1; + + // |page_size|, now deprecated. + reserved 2; + + // |optimize_for|, now deprecated. + reserved 3; + + enum FillPolicy { + UNSPECIFIED = 0; + + // Default behavior. The buffer operates as a conventional ring buffer. + // If the writer is faster than the reader (or if the reader reads only + // after tracing is stopped) newly written packets will overwrite old + // packets. + RING_BUFFER = 1; + + // Behaves like RING_BUFFER as long as there is space in the buffer or + // the reader catches up with the writer. As soon as the writer hits + // an unread chunk, it stops accepting new data in the buffer. + DISCARD = 2; + } + optional FillPolicy fill_policy = 4; + } + repeated BufferConfig buffers = 1; + + // An identifier clients can use to tie this trace to other logging. + // DEPRECATED as per v32. See TracePacket.trace_uuid for the authoritative + // Trace UUID. If this field is set, the tracing service will respect the + // requested UUID (i.e. TracePacket.trace_uuid == this field) but only if + // gap-less snapshotting is not used. + optional int64 trace_uuid_msb = 27 [deprecated = true]; + optional int64 trace_uuid_lsb = 28 [deprecated = true]; + + // When set applies a post-filter to the trace contents using the filter + // provided. The filter is applied at ReadBuffers() time and works both in the + // case of IPC readback and write_into_file. This filter can be generated + // using `tools/proto_filter -s schema.proto -F filter_out.bytes` or + // `-T filter_out.escaped_string` (for .pbtx). See go/trace-filtering for + // design. + // + // Introduced in Android S, but it was broken (b/195065199). Reintroduced in + // Android T with a different field number. Updated in Android U with a new + // bytecode version which supports string filtering. + // Android-only. Not for general use. If set, reports the trace to the + // Android framework. This field is read by perfetto_cmd, rather than the + // tracing service. This field must be set when passing the --upload flag to + // perfetto_cmd. + +} +// A snapshot of clock readings to allow for trace alignment. +message ClockSnapshot { + message Clock { + // DEPRECATED. This enum has moved to ../common/builtin_clock.proto. + enum BuiltinClocks { + UNKNOWN = 0; + REALTIME = 1; + REALTIME_COARSE = 2; + MONOTONIC = 3; + MONOTONIC_COARSE = 4; + MONOTONIC_RAW = 5; + BOOTTIME = 6; + BUILTIN_CLOCK_MAX_ID = 63; + + reserved 7, 8; + } + + // Clock IDs have the following semantic: + // [1, 63]: Builtin types, see BuiltinClock from + // ../common/builtin_clock.proto. + // [64, 127]: User-defined clocks. These clocks are sequence-scoped. They + // are only valid within the same |trusted_packet_sequence_id| + // (i.e. only for TracePacket(s) emitted by the same TraceWriter + // that emitted the clock snapshot). + // [128, MAX]: Reserved for future use. The idea is to allow global clock + // IDs and setting this ID to hash(full_clock_name) & ~127. + optional uint32 clock_id = 1; + + // Absolute timestamp. Unit is ns unless specified otherwise by the + // unit_multiplier_ns field below. + optional uint64 timestamp = 2; + + // When true each TracePacket's timestamp should be interpreted as a delta + // from the last TracePacket's timestamp (referencing this clock) emitted by + // the same packet_sequence_id. Should only be used for user-defined + // sequence-local clocks. The first packet timestamp after each + // ClockSnapshot that contains this clock is relative to the |timestamp| in + // the ClockSnapshot. + optional bool is_incremental = 3; + + // Allows to specify a custom unit different than the default (ns) for this + // clock domain. A multiplier of 1000 means that a timestamp = 3 should be + // interpreted as 3000 ns = 3 us. All snapshots for the same clock within a + // trace need to use the same unit. + optional uint64 unit_multiplier_ns = 4; + } + repeated Clock clocks = 1; + + // The authoritative clock domain for the trace. Defaults to BOOTTIME, but can + // be overridden in TraceConfig's builtin_data_sources. Trace processor will + // attempt to translate packet/event timestamps from various data sources (and + // their chosen clock domains) to this domain during import. + optional BuiltinClock primary_trace_clock = 2; +} + +// End of protos/perfetto/trace/clock_snapshot.proto + +// Begin of protos/perfetto/common/descriptor.proto + +// The protocol compiler can output a FileDescriptorSet containing the .proto +// files it parses. + + + +// Describes a message type. +message DescriptorProto { + optional string name = 1; + + repeated FieldDescriptorProto field = 2; + repeated FieldDescriptorProto extension = 6; + + repeated DescriptorProto nested_type = 3; + repeated EnumDescriptorProto enum_type = 4; + + reserved 5; + + repeated OneofDescriptorProto oneof_decl = 8; + + reserved 7; + + // Range of reserved tag numbers. Reserved tag numbers may not be used by + // fields or extension ranges in the same message. Reserved ranges may + // not overlap. + message ReservedRange { + // Inclusive. + optional int32 start = 1; + // Exclusive. + optional int32 end = 2; + } + repeated ReservedRange reserved_range = 9; + // Reserved field names, which may not be used by fields in the same message. + // A given name may only be reserved once. + repeated string reserved_name = 10; +} + +message FieldOptions { + // The packed option can be enabled for repeated primitive fields to enable + // a more efficient representation on the wire. Rather than repeatedly + // writing the tag and type for each element, the entire array is encoded as + // a single length-delimited blob. In proto3, only explicit setting it to + // false will avoid using packed encoding. + optional bool packed = 2; +} + +// Describes a field within a message. +message FieldDescriptorProto { + enum Type { + // 0 is reserved for errors. + // Order is weird for historical reasons. + TYPE_DOUBLE = 1; + TYPE_FLOAT = 2; + // Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT64 if + // negative values are likely. + TYPE_INT64 = 3; + TYPE_UINT64 = 4; + // Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT32 if + // negative values are likely. + TYPE_INT32 = 5; + TYPE_FIXED64 = 6; + TYPE_FIXED32 = 7; + TYPE_BOOL = 8; + TYPE_STRING = 9; + // Tag-delimited aggregate. + // Group type is deprecated and not supported in proto3. However, Proto3 + // implementations should still be able to parse the group wire format and + // treat group fields as unknown fields. + TYPE_GROUP = 10; + // Length-delimited aggregate. + TYPE_MESSAGE = 11; + + // New in version 2. + TYPE_BYTES = 12; + TYPE_UINT32 = 13; + TYPE_ENUM = 14; + TYPE_SFIXED32 = 15; + TYPE_SFIXED64 = 16; + // Uses ZigZag encoding. + TYPE_SINT32 = 17; + // Uses ZigZag encoding. + TYPE_SINT64 = 18; + }; + + enum Label { + // 0 is reserved for errors + LABEL_OPTIONAL = 1; + LABEL_REQUIRED = 2; + LABEL_REPEATED = 3; + }; + + optional string name = 1; + optional int32 number = 3; + optional Label label = 4; + + // If type_name is set, this need not be set. If both this and type_name + // are set, this must be one of TYPE_ENUM, TYPE_MESSAGE or TYPE_GROUP. + optional Type type = 5; + + // For message and enum types, this is the name of the type. If the name + // starts with a '.', it is fully-qualified. Otherwise, C++-like scoping + // rules are used to find the type (i.e. first the nested types within this + // message are searched, then within the parent, on up to the root + // namespace). + optional string type_name = 6; + + // For extensions, this is the name of the type being extended. It is + // resolved in the same manner as type_name. + optional string extendee = 2; + + // For numeric types, contains the original text representation of the value. + // For booleans, "true" or "false". + // For strings, contains the default text contents (not escaped in any way). + // For bytes, contains the C escaped value. All bytes >= 128 are escaped. + // TODO(kenton): Base-64 encode? + optional string default_value = 7; + + optional FieldOptions options = 8; + + // If set, gives the index of a oneof in the containing type's oneof_decl + // list. This field is a member of that oneof. + optional int32 oneof_index = 9; + + reserved 10; +} + +// Describes a oneof. +message OneofDescriptorProto { + optional string name = 1; + optional OneofOptions options = 2; +} + +// Describes an enum type. +message EnumDescriptorProto { + optional string name = 1; + + repeated EnumValueDescriptorProto value = 2; + + reserved 3; + reserved 4; + + // Reserved enum value names, which may not be reused. A given name may only + // be reserved once. + repeated string reserved_name = 5; +} + +// Describes a value within an enum. +message EnumValueDescriptorProto { + optional string name = 1; + optional int32 number = 2; + + reserved 3; +} + +message OneofOptions { + reserved 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +// End of protos/perfetto/common/descriptor.proto + +// Begin of protos/perfetto/trace/extension_descriptor.proto + +// This message contains descriptors used to parse extension fields of +// TrackEvent. +// +// See docs/design-docs/extensions.md for more details. +// Trace events emitted by client instrumentation library (TRACE_EVENT macros), +// which describe activity on a track, such as a thread or asynchronous event +// track. The track is specified using separate TrackDescriptor messages and +// referred to via the track's UUID. +// +// A simple TrackEvent packet specifies a timestamp, category, name and type: +// ```protobuf +// trace_packet { +// timestamp: 1000 +// track_event { +// categories: ["my_cat"] +// name: "my_event" +// type: TYPE_INSTANT +// } +// } +// ``` +// +// To associate an event with a custom track (e.g. a thread), the track is +// defined in a separate packet and referred to from the TrackEvent by its UUID: +// ```protobuf +// trace_packet { +// track_descriptor { +// track_uuid: 1234 +// name: "my_track" +// +// // Optionally, associate the track with a thread. +// thread_descriptor { +// pid: 10 +// tid: 10 +// .. +// } +// } +// } +// ``` +// +// A pair of TYPE_SLICE_BEGIN and _END events form a slice on the track: +// +// ```protobuf +// trace_packet { +// timestamp: 1200 +// track_event { +// track_uuid: 1234 +// categories: ["my_cat"] +// name: "my_slice" +// type: TYPE_SLICE_BEGIN +// } +// } +// trace_packet { +// timestamp: 1400 +// track_event { +// track_uuid: 1234 +// type: TYPE_SLICE_END +// } +// } +// ``` +// TrackEvents also support optimizations to reduce data repetition and encoded +// data size, e.g. through data interning (names, categories, ...) and delta +// encoding of timestamps/counters. For details, see the InternedData message. +// Further, default values for attributes of events on the same sequence (e.g. +// their default track association) can be emitted as part of a +// TrackEventDefaults message. +// +// Next reserved id: 13 (up to 15). Next id: 50. +message TrackEvent { + // Names of categories of the event. In the client library, categories are a + // way to turn groups of individual events on or off. + // interned EventCategoryName. + repeated uint64 category_iids = 3; + // non-interned variant. + repeated string categories = 22; + + // Optional name of the event for its display in trace viewer. May be left + // unspecified for events with typed arguments. + // + // Note that metrics should not rely on event names, as they are prone to + // changing. Instead, they should use typed arguments to identify the events + // they are interested in. + oneof name_field { + // interned EventName. + uint64 name_iid = 10; + // non-interned variant. + string name = 23; + } + + // TODO(eseckler): Support using binary symbols for category/event names. + + // Type of the TrackEvent (required if |phase| in LegacyEvent is not set). + enum Type { + TYPE_UNSPECIFIED = 0; + + // Slice events are events that have a begin and end timestamp, i.e. a + // duration. They can be nested similar to a callstack: If, on the same + // track, event B begins after event A, but before A ends, B is a child + // event of A and will be drawn as a nested event underneath A in the UI. + // Note that child events should always end before their parents (e.g. B + // before A). + // + // Each slice event is formed by a pair of BEGIN + END events. The END event + // does not need to repeat any TrackEvent fields it has in common with its + // corresponding BEGIN event. Arguments and debug annotations of the BEGIN + + // END pair will be merged during trace import. + // + // Note that we deliberately chose not to support COMPLETE events (which + // would specify a duration directly) since clients would need to delay + // writing them until the slice is completed, which can result in reordered + // events in the trace and loss of unfinished events at the end of a trace. + TYPE_SLICE_BEGIN = 1; + TYPE_SLICE_END = 2; + + // Instant events are nestable events without duration. They can be children + // of slice events on the same track. + TYPE_INSTANT = 3; + + // Event that provides a value for a counter track. |track_uuid| should + // refer to a counter track and |counter_value| set to the new value. Note + // that most other TrackEvent fields (e.g. categories, name, ..) are not + // supported for TYPE_COUNTER events. See also CounterDescriptor. + TYPE_COUNTER = 4; + } + optional Type type = 9; + + // Identifies the track of the event. The default value may be overridden + // using TrackEventDefaults, e.g., to specify the track of the TraceWriter's + // sequence (in most cases sequence = one thread). If no value is specified + // here or in TrackEventDefaults, the TrackEvent will be associated with an + // implicit trace-global track (uuid 0). See TrackDescriptor::uuid. + optional uint64 track_uuid = 11; + + // A new value for a counter track. |track_uuid| should refer to a track with + // a CounterDescriptor, and |type| should be TYPE_COUNTER. For a more + // efficient encoding of counter values that are sampled at the beginning/end + // of a slice, see |extra_counter_values| and |extra_counter_track_uuids|. + // Counter values can optionally be encoded in as delta values (positive or + // negative) on each packet sequence (see CounterIncrementalBase). + oneof counter_value_field { + int64 counter_value = 30; + double double_counter_value = 44; + } + + // To encode counter values more efficiently, we support attaching additional + // counter values to a TrackEvent of any type. All values will share the same + // timestamp specified in the TracePacket. The value at + // extra_counter_values[N] is for the counter track referenced by + // extra_counter_track_uuids[N]. + // + // |extra_counter_track_uuids| may also be set via TrackEventDefaults. There + // should always be equal or more uuids than values. It is valid to set more + // uuids (e.g. via defaults) than values. If uuids are specified in + // TrackEventDefaults and a TrackEvent, the TrackEvent uuids override the + // default uuid list. + // + // For example, this allows snapshotting the thread time clock at each + // thread-track BEGIN and END event to capture the cpu time delta of a slice. + repeated uint64 extra_counter_track_uuids = 31; + repeated int64 extra_counter_values = 12; + + // Counter snapshots using floating point instead of integer values. + repeated uint64 extra_double_counter_track_uuids = 45; + repeated double extra_double_counter_values = 46; + + // --------------------------------------------------------------------------- + // TrackEvent arguments: + // --------------------------------------------------------------------------- + + // This field is used only if the source location represents the function that + // executes during this event. + + // Extension range for future use. + extensions 1000 to 9899; + // Reserved for Perfetto unit and integration tests. + extensions 9900 to 10000; + + // --------------------------------------------------------------------------- + // Deprecated / legacy event fields, which will be removed in the future: + // --------------------------------------------------------------------------- + + // Deprecated. Use the |timestamp| and |timestamp_clock_id| fields in + // TracePacket instead. + // + // Timestamp in microseconds (usually CLOCK_MONOTONIC). + oneof timestamp { + // Delta timestamp value since the last TrackEvent or ThreadDescriptor. To + // calculate the absolute timestamp value, sum up all delta values of the + // preceding TrackEvents since the last ThreadDescriptor and add the sum to + // the |reference_timestamp| in ThreadDescriptor. This value should always + // be positive. + int64 timestamp_delta_us = 1; + // Absolute value (e.g. a manually specified timestamp in the macro). + // This is a one-off value that does not affect delta timestamp computation + // in subsequent TrackEvents. + int64 timestamp_absolute_us = 16; + } + + // Deprecated. Use |extra_counter_values| and |extra_counter_track_uuids| to + // encode thread time instead. + // + // CPU time for the current thread (e.g., CLOCK_THREAD_CPUTIME_ID) in + // microseconds. + +// Next id: 20. + +} + +// Default values for fields of all TrackEvents on the same packet sequence. +// Should be emitted as part of TracePacketDefaults whenever incremental state +// is cleared. It's defined here because field IDs should match those of the +// corresponding fields in TrackEvent. +message TrackEventDefaults { + optional uint64 track_uuid = 11; + repeated uint64 extra_counter_track_uuids = 31; + repeated uint64 extra_double_counter_track_uuids = 45; + + // TODO(eseckler): Support default values for more TrackEvent fields. +} + +// -------------------- +// Interned data types: +// -------------------- + +message EventCategory { + optional uint64 iid = 1; + optional string name = 2; +} + +message EventName { + optional uint64 iid = 1; + optional string name = 2; +} + +// Begin of protos/perfetto/trace/ps/process_tree.proto + +// Metadata about the processes and threads in the trace. +// Note: this proto was designed to be filled in by traced_probes and should +// only be populated with accurate information coming from the system. Other +// trace writers should prefer to fill ThreadDescriptor and ProcessDescriptor +// in TrackDescriptor. +message ProcessTree { + // Representation of a thread. + message Thread { + // The thread ID (as per gettid()) in the root PID namespace. + optional int32 tid = 1; + + // Thread group id (i.e. the PID of the process, == TID of the main thread) + optional int32 tgid = 3; + + // The name of the thread. + optional string name = 2; + + // The non-root-level thread IDs if the thread runs in a PID namespace. Read + // from the NSpid entry of /proc//status, with the first element (root- + // level thread ID) omitted. + repeated int32 nstid = 4; + } + + // Representation of a process. + message Process { + // The UNIX process ID, aka thread group ID (as per getpid()) in the root + // PID namespace. + optional int32 pid = 1; + + // The parent process ID, as per getppid(). + optional int32 ppid = 2; + + // The command line for the process, as per /proc/pid/cmdline. + // If it is a kernel thread there will only be one cmdline field + // and it will contain /proc/pid/comm. + repeated string cmdline = 3; + + // No longer used as of Apr 2018, when the dedicated |threads| field was + // introduced in ProcessTree. + repeated Thread threads_deprecated = 4 [deprecated = true]; + + // The uid for the process, as per /proc/pid/status. + optional int32 uid = 5; + + // The non-root-level process IDs if the process runs in a PID namespace. + // Read from the NSpid entry of /proc//status, with the first element ( + // root-level process ID) omitted. + repeated int32 nspid = 6; + } + + // List of processes and threads in the client. These lists are incremental + // and not exhaustive. A process and its threads might show up separately in + // different ProcessTree messages. A thread might event not show up at all, if + // no sched_switch activity was detected, for instance: + // #0 { processes: [{pid: 10, ...}], threads: [{pid: 11, tgid: 10}] } + // #1 { threads: [{pid: 12, tgid: 10}] } + // #2 { processes: [{pid: 20, ...}], threads: [{pid: 13, tgid: 10}] } + repeated Process processes = 1; + repeated Thread threads = 2; + + // The time at which we finish collecting this process tree; + // the top-level packet timestamp is the time at which + // we begin collection. + optional uint64 collection_end_timestamp = 3; } +message TracePacketDefaults { + optional uint32 timestamp_clock_id = 58; + + // Default values for TrackEvents (e.g. default track). + optional TrackEventDefaults track_event_defaults = 11; + + // Defaults for perf profiler packets (PerfSample). + // optional PerfSampleDefaults perf_sample_defaults = 12; +} +// End of protos/perfetto/trace/trace_packet_defaults.proto + +// Begin of protos/perfetto/trace/trace_uuid.proto + +// A random unique ID that identifies the trace. +// This message has been introduced in v32. Prior to that, the UUID was +// only (optionally) present in the TraceConfig.trace_uuid_msb/lsb fields. +// This has been moved to a standalone packet to deal with new use-cases for +// go/gapless-aot, where the same tracing session can be serialized several +// times, in which case the UUID is changed on each snapshot and does not match +// the one in the TraceConfig. +message TraceUuid { + optional int64 msb = 1; + optional int64 lsb = 2; +} + +// End of protos/perfetto/trace/trace_uuid.proto + +// Begin of protos/perfetto/trace/track_event/process_descriptor.proto + +// Describes a process's attributes. Emitted as part of a TrackDescriptor, +// usually by the process's main thread. +// +// Next id: 9. message ProcessDescriptor { optional int32 pid = 1; + repeated string cmdline = 2; optional string process_name = 6; + + optional int32 process_priority = 5; + // Process start time in nanoseconds. + // The timestamp refers to the trace clock by default. Other clock IDs + // provided in TracePacket are not supported. + optional int64 start_timestamp_ns = 7; + + // Labels can be used to further describe properties of the work performed by + // the process. For example, these can be used by Chrome renderer process to + // provide titles of frames being rendered. + repeated string process_labels = 8; } -message TracePacketDefaults { - optional uint32 timestamp_clock_id = 58; +// End of protos/perfetto/trace/track_event/process_descriptor.proto + +// Begin of protos/perfetto/trace/track_event/range_of_interest.proto + +// This message specifies the "range of interest" for track events. With the +// `drop_track_event_data_before` option set to `kTrackEventRangeOfInterest`, +// Trace Processor drops track events outside of this range. +message TrackEventRangeOfInterest { + optional int64 start_us = 1; } +// End of protos/perfetto/trace/track_event/range_of_interest.proto + +// Begin of protos/perfetto/trace/track_event/thread_descriptor.proto +// Describes a thread's attributes. Emitted as part of a TrackDescriptor, +// usually by the thread's trace writer. +// +// Next id: 9. message ThreadDescriptor { optional int32 pid = 1; optional int32 tid = 2; + optional string thread_name = 5; + + // --------------------------------------------------------------------------- + // Deprecated / legacy fields, which will be removed in the future: + // --------------------------------------------------------------------------- + + optional int64 reference_timestamp_us = 6; + + // Absolute reference values. Clock values in subsequent TrackEvents can be + // encoded accumulatively and relative to these. This reduces their var-int + // encoding size. + // TODO(eseckler): Deprecated. Replace these with ClockSnapshot encoding. + optional int64 reference_thread_time_us = 7; + optional int64 reference_thread_instruction_count = 8; + + // To support old UI. New UI should determine default sorting by thread_type. + optional int32 legacy_sort_index = 3; } +message CounterDescriptor { + // Built-in counters, usually with special meaning in the client library, + // trace processor, legacy JSON format, or UI. Trace processor will infer a + // track name from the enum value if none is provided in TrackDescriptor. + enum BuiltinCounterType { + COUNTER_UNSPECIFIED = 0; + + // Thread-scoped counters. The thread's track should be specified via + // |parent_uuid| in the TrackDescriptor for such a counter. + + // implies UNIT_TIME_NS. + COUNTER_THREAD_TIME_NS = 1; + + // implies UNIT_COUNT. + COUNTER_THREAD_INSTRUCTION_COUNT = 2; + } + + // Type of the values for the counters - to supply lower granularity units, + // see also |unit_multiplier|. + enum Unit { + UNIT_UNSPECIFIED = 0; + UNIT_TIME_NS = 1; + UNIT_COUNT = 2; + UNIT_SIZE_BYTES = 3; + // TODO(eseckler): Support more units as necessary. + } + + // For built-in counters (e.g. thread time). Custom user-specified counters + // (e.g. those emitted by TRACE_COUNTER macros of the client library) + // shouldn't set this, and instead provide a counter name via TrackDescriptor. + optional BuiltinCounterType type = 1; + + // Names of categories of the counter (usually for user-specified counters). + // In the client library, categories are a way to turn groups of individual + // counters (or events) on or off. + repeated string categories = 2; + + // Type of the counter's values. Built-in counters imply a value for this + // field. + optional Unit unit = 3; + + // In order to use a unit not defined as a part of |Unit|, a free-form unit + // name can be used instead. + optional string unit_name = 6; + + // Multiplication factor of this counter's values, e.g. to supply + // COUNTER_THREAD_TIME_NS timestamps in microseconds instead. + optional int64 unit_multiplier = 4; + + // Whether values for this counter are provided as delta values. Only + // supported for counters that are emitted on a single packet-sequence (e.g. + // thread time). Counter values in subsequent packets on the current packet + // sequence will be interpreted as delta values from the sequence's most + // recent value for the counter. When incremental state is cleared, the + // counter value is considered to be reset to 0. Thus, the first value after + // incremental state is cleared is effectively an absolute value. + optional bool is_incremental = 5; + + // TODO(eseckler): Support arguments describing the counter (?). + // repeated DebugAnnotation debug_annotations; +} +// As a fallback, TrackEvents emitted without an explicit track association will +// be associated with an implicit trace-global track (uuid = 0), see also +// |TrackEvent::track_uuid|. It is possible but not necessary to emit a +// TrackDescriptor for this implicit track. +// +// Next id: 10. message TrackDescriptor { + // Unique ID that identifies this track. This ID is global to the whole trace. + // Producers should ensure that it is unlikely to clash with IDs emitted by + // other producers. A value of 0 denotes the implicit trace-global track. + // + // For example, legacy TRACE_EVENT macros may use a hash involving the async + // event id + id_scope, pid, and/or tid to compute this ID. optional uint64 uuid = 1; + + // A parent track reference can be used to describe relationships between + // tracks. For example, to define an asynchronous track which is scoped to a + // specific process, specify the uuid for that process's process track here. + // Similarly, to associate a COUNTER_THREAD_TIME_NS counter track with a + // thread, specify the uuid for that thread's thread track here. optional uint64 parent_uuid = 5; + + // Name of the track. Optional - if unspecified, it may be derived from the + // process/thread name (process/thread tracks), the first event's name (async + // tracks), or counter name (counter tracks). optional string name = 2; + + // Associate the track with a process, making it the process-global track. + // There should only be one such track per process (usually for instant + // events; trace processor uses this fact to detect pid reuse). If you need + // more (e.g. for asynchronous events), create child tracks using parent_uuid. + // + // Trace processor will merge events on a process track with slice-type events + // from other sources (e.g. ftrace) for the same process into a single + // timeline view. optional ProcessDescriptor process = 3; + // optional ChromeProcessDescriptor chrome_process = 6; + + // Associate the track with a thread, indicating that the track's events + // describe synchronous code execution on the thread. There should only be one + // such track per thread (trace processor uses this fact to detect tid reuse). + // + // Trace processor will merge events on a thread track with slice-type events + // from other sources (e.g. ftrace) for the same thread into a single timeline + // view. optional ThreadDescriptor thread = 4; + // optional ChromeThreadDescriptor chrome_thread = 7; + + // Descriptor for a counter track. If set, the track will only support + // TYPE_COUNTER TrackEvents (and values provided via TrackEvent's + // |extra_counter_values|). + optional CounterDescriptor counter = 8; + + // If true, forces Trace Processor to use separate tracks for track events + // and system events for the same thread. + // Track events timestamps in Chrome have microsecond resolution, while + // system events use nanoseconds. It results in broken event nesting when + // track events and system events share a track. + optional bool disallow_merging_with_system_tracks = 9; } -message TrackEvent { +// End of protos/perfetto/trace/track_event/track_descriptor.proto - enum Type { - TYPE_SLICE_BEGIN = 1; - TYPE_SLICE_END = 2; - } - optional Type type = 9; - optional uint64 track_uuid = 11; +// Design doc: go/trace-ui-state. +message UiState { + // The start and end bounds of the viewport of the UI in nanoseconds. + // + // This is the absolute time associated to slices and other events in + // trace processor tables (i.e. the |ts| column of most tables) + optional int64 timeline_start_ts = 1; + optional int64 timeline_end_ts = 2; - oneof name_field { - string name = 23; - } + // Indicates that the given process should be highlighted by the UI. + message HighlightProcess { + oneof selector { + // The pid of the process to highlight. This is useful for UIs to focus + // on tracks of a particular process in the trace. + // + // If more than one process in a trace has the same pid, it is UI + // implementation specific how the process to be focused will be + // chosen. + uint32 pid = 1; + // The command line of the process to highlight; for most Android apps, + // this is the package name of the app. This is useful for UIs to focus + // on a particular app in the trace. + // + // If more than one process hasthe same cmdline, it is UI implementation + // specific how the process to be focused will be chosen. + string cmdline = 2; + } + } + optional HighlightProcess highlight_process = 3; } +// Next id: 95. message TracePacket { + // The timestamp of the TracePacket. + // By default this timestamps refers to the trace clock (CLOCK_BOOTTIME on + // Android). It can be overridden using a different timestamp_clock_id. + // The clock domain definition in ClockSnapshot can also override: + // - The unit (default: 1ns). + // - The absolute vs delta encoding (default: absolute timestamp). optional uint64 timestamp = 8; + // Specifies the ID of the clock used for the TracePacket |timestamp|. Can be + // one of the built-in types from ClockSnapshot::BuiltinClocks, or a + // producer-defined clock id. + // If unspecified and if no default per-sequence value has been provided via + // TracePacketDefaults, it defaults to BuiltinClocks::BOOTTIME. + optional uint32 timestamp_clock_id = 58; + oneof data { + ProcessTree process_tree = 2; + //ProcessStats process_stats = 9; + //InodeFileMap inode_file_map = 4; + //ChromeEventBundle chrome_events = 5; + ClockSnapshot clock_snapshot = 6; + //SysStats sys_stats = 7; TrackEvent track_event = 11; + + // IDs up to 15 are reserved. They take only one byte to encode their + // preamble so should be used for frequent events. + + TraceUuid trace_uuid = 89; + TraceConfig trace_config = 33; + + UiState ui_state = 78; + + + // Only used by TrackEvent. TrackDescriptor track_descriptor = 60; + + // Deprecated, use TrackDescriptor instead. + ProcessDescriptor process_descriptor = 43; + + // Deprecated, use TrackDescriptor instead. + ThreadDescriptor thread_descriptor = 44; + + // Events from the Linux kernel ftrace infrastructure. + //FtraceEventBundle ftrace_events = 1; + + // This field is emitted at periodic intervals (~10s) and + // contains always the binary representation of the UUID + // {82477a76-b28d-42ba-81dc-33326d57a079}. This is used to be able to + // efficiently partition long traces without having to fully parse them. + bytes synchronization_marker = 36; + + // Zero or more proto encoded trace packets compressed using deflate. + // Each compressed_packets TracePacket (including the two field ids and + // sizes) should be less than 512KB. + // bytes compressed_packets = 50; + + //TestEvent for_testing = 900; + // gpu_freq.Packet gpu_freq_packet = 1001; } + // Trusted user id of the producer which generated this packet. Keep in sync + // with TrustedPacket.trusted_uid. + // + // TODO(eseckler): Emit this field in a PacketSequenceDescriptor message + // instead. + oneof optional_trusted_uid { int32 trusted_uid = 3; }; + + // Service-assigned identifier of the packet sequence this packet belongs to. + // Uniquely identifies a producer + writer pair within the tracing session. A + // value of zero denotes an invalid ID. Keep in sync with + // TrustedPacket.trusted_packet_sequence_id. oneof optional_trusted_packet_sequence_id { uint32 trusted_packet_sequence_id = 10; } - required TracePacketDefaults trace_packet_defaults = 59; + // Trusted process id of the producer which generated this packet, written by + // the service. + optional int32 trusted_pid = 79; + + // Incrementally emitted interned data, valid only on the packet's sequence + // (packets with the same |trusted_packet_sequence_id|). The writer will + // usually emit new interned data in the same TracePacket that first refers to + // it (since the last reset of interning state). It may also be emitted + // proactively in advance of referring to them in later packets. + // optional InternedData interned_data = 12; + + enum SequenceFlags { + SEQ_UNSPECIFIED = 0; + + // Set by the writer to indicate that it will re-emit any incremental data + // for the packet's sequence before referring to it again. This includes + // interned data as well as periodically emitted data like + // Process/ThreadDescriptors. This flag only affects the current packet + // sequence (see |trusted_packet_sequence_id|). + // + // When set, this TracePacket and subsequent TracePackets on the same + // sequence will not refer to any incremental data emitted before this + // TracePacket. For example, previously emitted interned data will be + // re-emitted if it is referred to again. + // + // When the reader detects packet loss (|previous_packet_dropped|), it needs + // to skip packets in the sequence until the next one with this flag set, to + // ensure intact incremental data. + SEQ_INCREMENTAL_STATE_CLEARED = 1; + + // This packet requires incremental state, such as TracePacketDefaults or + // InternedData, to be parsed correctly. The trace reader should skip this + // packet if incremental state is not valid on this sequence, i.e. if no + // packet with the SEQ_INCREMENTAL_STATE_CLEARED flag has been seen on the + // current |trusted_packet_sequence_id|. + SEQ_NEEDS_INCREMENTAL_STATE = 2; + }; + optional uint32 sequence_flags = 13; + + // DEPRECATED. Moved to SequenceFlags::SEQ_INCREMENTAL_STATE_CLEARED. + optional bool incremental_state_cleared = 41; + + // Default values for fields of later TracePackets emitted on this packet's + // sequence (TracePackets with the same |trusted_packet_sequence_id|). + // It must be reemitted when incremental state is cleared (see + // |incremental_state_cleared|). + // Requires that any future packet emitted on the same sequence specifies + // the SEQ_NEEDS_INCREMENTAL_STATE flag. + // TracePacketDefaults always override the global defaults for any future + // packet on this sequence (regardless of SEQ_NEEDS_INCREMENTAL_STATE). + optional TracePacketDefaults trace_packet_defaults = 59; + + // Flag set by the service if, for the current packet sequence (see + // |trusted_packet_sequence_id|), either: + // * this is the first packet, or + // * one or multiple packets were dropped since the last packet that the + // consumer read from the sequence. This can happen if chunks in the trace + // buffer are overridden before the consumer could read them when the trace + // is configured in ring buffer mode. + // + // When packet loss occurs, incrementally emitted data (including interned + // data) on the sequence should be considered invalid up until the next packet + // with SEQ_INCREMENTAL_STATE_CLEARED set. optional bool previous_packet_dropped = 42; + + // Flag set by a producer (starting from SDK v29) if, for the current packet + // sequence (see |trusted_packet_sequence_id|), this is the first packet. + // + // This flag can be used for distinguishing the two situations when + // processing the trace: + // 1. There are no prior events for the sequence because of data loss, e.g. + // due to ring buffer wrapping. + // 2. There are no prior events for the sequence because it didn't start + // before this packet (= there's definitely no preceeding data loss). + // + // Given that older SDK versions do not support this flag, this flag not + // being present for a particular sequence does not necessarily imply data + // loss. + optional bool first_packet_on_sequence = 87; } +// End of protos/perfetto/trace/trace_packet.proto + +// Begin of protos/perfetto/trace/trace.proto + message Trace { repeated TracePacket packet = 1; + + // Do NOT add any other field here. This is just a convenience wrapper for + // the use case of a trace being saved to a file. There are other cases + // (streaming) where TracePacket are directly streamed without being wrapped + // in a Trace proto. Nothing should ever rely on the full trace, all the + // logic should be based on TracePacket(s). } +// End of protos/perfetto/trace/trace.proto diff --git a/xprof/xprof.sh.erb.in b/xprof/xprof.sh.erb.in index 060758c4..633f6783 100644 --- a/xprof/xprof.sh.erb.in +++ b/xprof/xprof.sh.erb.in @@ -160,6 +160,7 @@ display_help() { echo " -v, --version Print the version string" echo " -r, --replay [path] will be treated as paths to traces folders ($HOME/lttng-traces/...)" echo " If no arguments are provided, will use the latest trace available" + echo " -s, --sample Activate sampling" echo echo " Example:" echo " $(basename $0) ./a.out" @@ -332,6 +333,15 @@ setup_lttng() { export LTTNG_UST_ALLOW_BLOCKING=1 lttngq enable-channel --userspace --blocking-timeout=inf blocking-channel + #Activate sampling on non-blocking stream + if [ $sample == true ]; then + export LTTNG_UST_SAMPLING=1 + lttngq enable-channel --userspace nonblocking-channel + lttngq enable-event --channel=nonblocking-channel --userspace lttng_ust_sampling:* + lttngq enable-event --channel=nonblocking-channel --userspace lttng_ust_ze_sampling:* + lttngq add-context --userspace --channel=nonblocking-channel -t vpid -t vtid + fi + <% if languages.include?("omp") %> enable_events_omp <% end %> @@ -555,6 +565,7 @@ asm=false replay=false cleanup=false profile=true +sample=false while (( "$#" )); do case "$1" in @@ -567,6 +578,7 @@ while (( "$#" )); do -l | --timeline) shift; mode="timeline" ;; -j | --json) shift; bt_tally_argv+=" --display_mode=json" ;; -m | --tracing-mode) shift; tracing_mode=$1; shift ;; + -s | --sample) shift; sample=true;; --no-profile) shift; profile=false ;; --backend-level) shift; bt_tally_argv+=" --backend_level=$1"; shift ;; --no-save) shift; processing_mode="on-the-fly" ;; diff --git a/ze/Makefile.am b/ze/Makefile.am index 3d752e58..a114df5a 100644 --- a/ze/Makefile.am +++ b/ze/Makefile.am @@ -89,6 +89,7 @@ ZE_PROBES_INCL = $(ZE_PROBES:=.h) ZE_PROBES_SRC = $(ZE_PROBES:=.c) ZE_STATIC_PROBES = \ + ze_sampling \ ze_profiling \ ze_properties \ ze_build @@ -161,9 +162,9 @@ nodist_libTracerZE_la_SOURCES = \ $(ZE_STATIC_PROBES_INCL) \ tracer_ze.c -libTracerZE_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(srcdir)/include -I../utils -I./ +libTracerZE_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/sampling -I$(srcdir)/include -I../utils -I./ libTracerZE_la_CFLAGS = -Wall -Wextra $(WERROR) $(LIBFFI_CFLAGS) $(LTTNG_UST_CFLAGS) -libTracerZE_la_LDFLAGS = $(LTTNG_UST_LIBS) -ldl -lpthread $(LIBFFI_LIBS) +libTracerZE_la_LDFLAGS = $(LTTNG_UST_LIBS) -ldl -lpthread $(LIBFFI_LIBS) ../sampling/libThapiSampling.la libTracerZE_la_LIBADD = libzetracepoints.la install-exec-hook: diff --git a/ze/gen_ze.rb b/ze/gen_ze.rb index bbcdf56d..e6b273b8 100644 --- a/ze/gen_ze.rb +++ b/ze/gen_ze.rb @@ -24,6 +24,7 @@ #include "zet_structs_tracepoints.h" #include "zes_structs_tracepoints.h" #include "zel_structs_tracepoints.h" +#include "ze_sampling.h" #include "ze_profiling.h" #include "ze_properties.h" #include "ze_build.h" diff --git a/ze/gen_ze_custom_probes.rb b/ze/gen_ze_custom_probes.rb index b57418dc..28441688 100644 --- a/ze/gen_ze_custom_probes.rb +++ b/ze/gen_ze_custom_probes.rb @@ -6,7 +6,7 @@ h = YAML::load_file(File.join(SRC_DIR,"ze_events.yaml"))[namespace] -raise "Invalid namespace!" unless h +raise "Invalid namespace: #{namespace}!" unless h puts <= MAX_N_DEVS) { + fprintf(stderr, "ERROR: %d devs are detected, which exceeds MAX_N_DEVS\n", n_devhs); + return -1; + } + + res = ZE_DEVICE_GET_PTR(selected_drvh, &n_devhs, devhs_cache); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZE_DEVICE_GET_PTR", res); + return -1; + } + + // iterate devices to find power domains associated with each device + for (uint32_t i=0; i 0 && npwrdoms <= MAX_N_PDOMS) { + res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(smh, &npwrdoms, pwrhs); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); + return -1; + } + // printf("%lu \n",(unsigned long)npwrdoms); + for (uint32_t di=0; di < npwrdoms; di++) + { + + mainpwrh_per_dev[(i * npwrdoms) + di] = pwrhs[di]; + } + canreadpwrh_per_dev[i] = 1; + if(0) { + zes_power_energy_counter_t ecounter; + res = ZES_POWER_GET_ENERGY_COUNTER_PTR(mainpwrh_per_dev[i], &ecounter); + if (res != ZE_RESULT_SUCCESS) _ZE_ERROR_MSG_NOTERMINATE("ZES_POWER_GET_ENERGY_COUNTER_PTR", res); + printf("%lu us %lu uj\n", ecounter.timestamp, ecounter.energy); + } + } else { + fprintf(stderr, "Warning: npwrdoms=%d\n", npwrdoms); + } + } + } +//////////////////////////////////////////////////gpu_frequency + // Get the frequency domains + domainCount = 0; + zes_freq_handle_t domains[MAX_N_FDOMS]; + res = zesDeviceEnumFrequencyDomains(devhs_cache[i], &domainCount, NULL); + if (domainCount > 0) { + res = zesDeviceEnumFrequencyDomains(devhs_cache[i], &domainCount, domains); + if (res != ZE_RESULT_SUCCESS) { + printf("Failed to retrieve frequency domains\n"); + return -1; + } + + for (uint32_t k = 0; k < domainCount; ++k) { + zes_freq_properties_t domainProps = { + .stype = ZES_STRUCTURE_TYPE_FREQ_PROPERTIES, + .pNext = NULL + }; + domainList[(i * domainCount) + k] = domains[k]; + zesFrequencyGetProperties(domainList[(i * domainCount) + k], &domainProps); + + zes_freq_state_t state = { + .stype = ZES_STRUCTURE_TYPE_FREQ_STATE, + .pNext = NULL + }; + zesFrequencyGetState(domainList[(i * domainCount) + k], &state); + } + + } + + else { + fprintf(stderr, "Warning: dev%d is not a GPU!\n", i); + } + } + + initialized = 1; + return 0; +} + +///////////////////////////////// + +static void thapi_sampling_energy() { + uint64_t ts_us; + uint64_t energy_uj; + uint32_t frequency; + for (int i=0; i= 1 ? 1 : 0 ); di++) { + zerReadEnergy(i, di, &ts_us, &energy_uj); + do_tracepoint(lttng_ust_ze_sampling, gpu_energy, + (ze_device_handle_t)devhs_cache[i],di, + (uint64_t)energy_uj,ts_us); + } + + for (uint32_t domain=0; domain < (zerGetFDoms() >= 1 ? 1 : 0); domain++) { + //for (uint32_t domain=0; domain < zerGetFDoms(); domain++){ + zerReadFrequency(i,domain, &frequency); + do_tracepoint(lttng_ust_ze_sampling, gpu_frequency, (ze_device_handle_t)devhs_cache[i], + domain, ts_us, frequency); + } +} +} static void _load_tracer(void) { char *s = NULL; void *handle = NULL; int verbose = 0; + struct timespec interval; + thapi_sampling_init(); s = getenv("LTTNG_UST_ZE_LIBZE_LOADER"); if (s) @@ -798,6 +1048,14 @@ static void _load_tracer(void) { else if (verbose) fprintf(stderr, "Warning: LTTNG_UST_ZE_PARANOID_DRIFT not activated without LTTNG_UST_ZE_PROFILE\n"); } + + if (getenv("LTTNG_UST_SAMPLING_ENERGY")) { + zerInit(); + interval.tv_sec = 0; + interval.tv_nsec = 50000000; + thapi_register_sampling(&thapi_sampling_energy, &interval); + } + if (_do_profile) atexit(&_lib_cleanup); } diff --git a/ze/ze_events.yaml b/ze/ze_events.yaml index b8019a4d..4d914186 100644 --- a/ze/ze_events.yaml +++ b/ze/ze_events.yaml @@ -1,4 +1,28 @@ --- +lttng_ust_ze_sampling: + events: + - name: gpu_energy + args: + - [ ze_device_handle_t, hDevice ] + - [ uint32_t, domain] + - [ uint64_t, energy ] + - [ uint64_t, timestamp ] + fields: + - [ ctf_integer_hex, uintptr_t, hDevice, "(uintptr_t)hDevice" ] + - [ ctf_integer, uint32_t, domain, "domain" ] + - [ ctf_integer, uint64_t, energy, "energy" ] + - [ ctf_integer, uint64_t, timestamp, "timestamp" ] + - name: gpu_frequency + args: + - [ ze_device_handle_t, hDevice ] + - [ uint32_t, domain] + - [ uint64_t, timestamp ] + - [ uint64_t, frequency ] + fields: + - [ ctf_integer_hex, uintptr_t, hDevice, "(uintptr_t)hDevice" ] + - [ ctf_integer, uint32_t, domain, "domain" ] + - [ ctf_integer, uint64_t, timestamp, "timestamp" ] + - [ ctf_integer, uint64_t, frequency, "frequency" ] lttng_ust_ze_profiling: events: - name: event_profiling diff --git a/ze/zeinterval_callbacks.cpp.erb b/ze/zeinterval_callbacks.cpp.erb index d32e4ccf..8ca790bd 100644 --- a/ze/zeinterval_callbacks.cpp.erb +++ b/ze/zeinterval_callbacks.cpp.erb @@ -78,6 +78,39 @@ void *init_zeinterval_callbacks_state() { return (void*) s; } +static void create_and_enqueue_power_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t domain, const uint64_t energy, const uint64_t ts) { + zeinterval_callbacks_state* state = (zeinterval_callbacks_state*) zeinterval_iter_g->callbacks_state; + auto [it, inserted] = state->device_energy_ref.insert({{hostname, process_id, hDevice}, {energy, ts}}); + // First entry + if (inserted) + return; + + auto &[prev_energy, prev_ts] = it->second; + + bt_message *message = create_power_message(hostname, process_id, + thread_id, hDevice, domain, + static_cast(((energy-prev_energy) / static_cast(ts-prev_ts))*1000.0), + prev_ts, + zeinterval_iter_g->dispatch->power_event_class, + zeinterval_self_message_iterator_g, + zeinterval_iter_g->dispatch->stream, BACKEND_ZE); + state->downstream_message_queue.push(message); + prev_energy = energy; + prev_ts = ts; +} + +static void create_and_enqueue_frequency_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t domain, const uint64_t ts, const uint64_t frequency) { + bt_message *message = create_frequency_message(hostname, process_id, thread_id, hDevice, domain, ts, frequency, + zeinterval_iter_g->dispatch->frequency_event_class, + zeinterval_self_message_iterator_g, + zeinterval_iter_g->dispatch->stream, BACKEND_ZE); + + zeinterval_callbacks_state* state = (zeinterval_callbacks_state*) zeinterval_iter_g->callbacks_state; + state->downstream_message_queue.push(message); +} + static void create_and_enqueue_host_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, const char* name, const uint64_t ts, const uint64_t duration, const bool err) { @@ -246,10 +279,24 @@ static void zeinterval_<%= dbt_event.name %>_callback( |_ (_) (_ (_| | | |_ \/ _|_ | | | (_) / %> - <% if dbt_event.name_unsanitized.start_with?('lttng_ust_ze:') or - dbt_event.name_unsanitized.start_with?('lttng_ust_zet:') or - dbt_event.name_unsanitized.start_with?('lttng_ust_zes:') or - dbt_event.name_unsanitized.start_with?('lttng_ust_zel:') %> + <% if dbt_event.name_unsanitized == "lttng_ust_ze_sampling:gpu_energy" %> + const hostname_t hostname = borrow_hostname(bt_evt); + const process_id_t process_id = 0; + const thread_id_t thread_id = 0; + int64_t ns_from_origin; + bt_clock_snapshot_get_ns_from_origin(bt_clock, &ns_from_origin); + create_and_enqueue_power_message(hostname.c_str(), process_id, thread_id, (uintptr_t)hDevice, domain, energy, ns_from_origin); + <% elsif dbt_event.name_unsanitized == "lttng_ust_ze_sampling:gpu_frequency" %> + const hostname_t hostname = borrow_hostname(bt_evt); + const process_id_t process_id = 0; + const thread_id_t thread_id = 0; + int64_t ns_from_origin; + bt_clock_snapshot_get_ns_from_origin(bt_clock, &ns_from_origin); + create_and_enqueue_frequency_message(hostname.c_str(), process_id, thread_id, (uintptr_t)hDevice, domain, ns_from_origin, frequency); + <% elsif dbt_event.name_unsanitized.start_with?('lttng_ust_ze:') or + dbt_event.name_unsanitized.start_with?('lttng_ust_zet:') or + dbt_event.name_unsanitized.start_with?('lttng_ust_zes:') or + dbt_event.name_unsanitized.start_with?('lttng_ust_zel:') %> const hostname_t hostname = borrow_hostname(bt_evt); const process_id_t process_id = borrow_process_id(bt_evt); diff --git a/ze/zeinterval_callbacks.hpp b/ze/zeinterval_callbacks.hpp index 47775510..cbc0027e 100644 --- a/ze/zeinterval_callbacks.hpp +++ b/ze/zeinterval_callbacks.hpp @@ -20,6 +20,7 @@ typedef hp_device_t hpd_t; typedef hp_event_t hpe_t; typedef hp_kernel_t hpk_t; typedef std::tuple clock_lttng_device_t; +typedef std::tuple energy_timestamp_t; typedef std::tuple t_tfnm_m_d_ts_cld_t; typedef std::tuple l_tfnm_m_d_ts_t; @@ -54,6 +55,8 @@ struct zeinterval_callbacks_state { /* Stack to get begin end */ std::unordered_map> last_command; + /*Energy */ + std::unordered_map device_energy_ref; }; template