From 36e185f4fd172c01e5f6db35041382a84bef4f3c Mon Sep 17 00:00:00 2001 From: Solomon Bekele Date: Fri, 8 Nov 2024 14:35:42 +0000 Subject: [PATCH] sampling_daemon --- xprof/xprof.rb.in | 29 +- ze/Makefile.am | 34 +- ze/sampling_daemon.c | 841 +++++++++++++++++++++++++++++++++ ze/sampling_daemon.h | 7 + ze/tracer_ze_helpers.include.c | 494 +------------------ 5 files changed, 909 insertions(+), 496 deletions(-) create mode 100644 ze/sampling_daemon.c create mode 100644 ze/sampling_daemon.h diff --git a/xprof/xprof.rb.in b/xprof/xprof.rb.in index c2cab278..8dfa3daf 100755 --- a/xprof/xprof.rb.in +++ b/xprof/xprof.rb.in @@ -7,6 +7,10 @@ if Gem::Version.new(RUBY_VERSION) < Gem::Version.new(THAPI_RUBY_MINIMAL_VERSION) exit(1) end +# Define signals for sampling daemon + + + # We Cannot use "@ .. @" for libdir, bindir, and dataroodir # as they will appear as bash "${exec_prefix}/lib" # So for now we will rely on them having the default value, @@ -239,6 +243,9 @@ def lttng_home_dir File.join('/', 'tmp', "lttng_home--#{mpi_job_id}") end + + + def thapi_trace_dir_root raise unless mpi_master? @@ -731,10 +738,24 @@ def gm_rename_folder exec("mv -T #{thapi_trace_dir_tmp_root} #{thapi_trace_dir_root}") unless OPTIONS[:'trace-output'] thapi_trace_dir_root end +SIGRTMIN = 40 +RT_SIGNAL_SAMPLING_READY = SIGRTMIN +RT_SIGNAL_SAMPLING_FINISH = SIGRTMIN + 1 +def start_sampling_daemon() + puts "Started sampling daemon (PID #{Process.pid}})" + sampling_daemon_pid = spawn("sampling_daemon #{Process.pid}") + Process.detach(sampling_daemon_pid) + puts "Started sampling daemon (PID #{sampling_daemon_pid }})" + sampling_daemon_pid +end +def stop_sampling_daemon(sampling_daemon_pid) + Process.kill(RT_SIGNAL_SAMPLING_FINISH, sampling_daemon_pid) + puts "Sent FINISH signal to sampling daemon (PID #{sampling_daemon_pid})" +end # Start, Stop lttng, amd do the on-node analsysis def trace_and_on_node_processing(usr_argv) - def teardown_lttng(syncd, pids) + def teardown_lttng(syncd, pids, sampling_daemon_pid = nil) # We need to be sure that all the local ranks are finished syncd.local_barrier('waiting_for_application_ending') @@ -743,6 +764,7 @@ def trace_and_on_node_processing(usr_argv) # for the early exiting ranks return unless mpi_local_master? + stop_sampling_daemon(sampling_daemon_pid) if sampling_daemon_pid # Stop Lttng session and babeltrace daemons lm_lttng_teardown_session if OPTIONS[:archive] @@ -770,6 +792,9 @@ def trace_and_on_node_processing(usr_argv) end syncd.local_barrier('waiting_for_lttng_setup') + if sampling? + sampling_daemon_pid = start_sampling_daemon() + end # Launch User Command begin @@ -779,7 +804,7 @@ def trace_and_on_node_processing(usr_argv) raise end - teardown_lttng(syncd, pids) + teardown_lttng(syncd, pids, sampling_daemon_pid) return unless mpi_local_master? # Preprocess trace diff --git a/ze/Makefile.am b/ze/Makefile.am index 02d5d7bb..a866613b 100644 --- a/ze/Makefile.am +++ b/ze/Makefile.am @@ -106,6 +106,7 @@ EXTRA_DIST += \ ze_model.rb \ gen_babeltrace_ze_model.rb + ZE_PROBES = $(ZE_NAMESPACES:=_tracepoints) $(ZE_STRUCTS_NAMESPACES:=_tracepoints) ZE_PROBES_TP = $(ZE_PROBES:=.tp) ZE_PROBES_INCL = $(ZE_PROBES:=.h) @@ -139,6 +140,8 @@ CLEANFILES += \ $(ZE_STATIC_PROBES_INCL) \ $(ZE_STATIC_PROBES_SRC) + + EXTRA_DIST += \ gen_probe_base.rb \ $(ZE_GEN_TRACEPOINTS) \ @@ -148,14 +151,22 @@ BUILT_SOURCES = \ $(ZE_PROBES_INCL) \ $(ZE_STATIC_PROBES_INCL) +bin_PROGRAMS = sampling_daemon + +sampling_daemon_SOURCES = sampling_daemon.c +sampling_daemon_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/ze/include -I./ +sampling_daemon_CFLAGS = -Wall -Wextra $(WERROR) $(LTTNG_UST_CFLAGS) +sampling_daemon_LDADD = libzetracepoints.la -ldl -lpthread $(LTTNG_UST_LIBS) ../sampling/libThapiSampling.la + tracer_ze.c: $(srcdir)/gen_ze.rb $(srcdir)/tracer_ze_helpers.include.c $(srcdir)/ze.h.include $(ZE_MODEL) $(ZE_PROBES_INCL) $(ZE_STATIC_PROBES_INCL) SRC_DIR=$(srcdir) $(RUBY) $< > $@ + EXTRA_DIST += \ gen_ze.rb \ tracer_ze_helpers.include.c -CLEANFILES += tracer_ze.c +CLEANFILES += tracer_ze.c sampling_daemon bin_SCRIPTS = \ tracer_ze.sh @@ -172,23 +183,32 @@ libzetracepoints_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/inclu libzetracepoints_la_CFLAGS = -fPIC -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Wno-sign-compare $(WERROR) $(LTTNG_UST_CFLAGS) libzetracepoints_la_LDFLAGS = $(LTTNG_UST_LIBS) -zedir = $(pkglibdir)/ze -ze_LTLIBRARIES = libze_loader.la - -bt2dir = $(pkglibdir)/bt2 -bt2_LTLIBRARIES = libZEInterval.la +lib_LTLIBRARIES = libze_loader.la libZEInterval.la nodist_libze_loader_la_SOURCES = \ $(ZE_PROBES_INCL) \ $(ZE_STATIC_PROBES_INCL) \ tracer_ze.c - + libze_loader_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/sampling -I$(srcdir)/include -I$(top_srcdir)/utils -I./ libze_loader_la_CFLAGS = -Wall -Wextra $(WERROR) $(LIBFFI_CFLAGS) $(LTTNG_UST_CFLAGS) libze_loader_la_LDFLAGS = $(LTTNG_UST_LIBS) -ldl -lpthread $(LIBFFI_LIBS) ../sampling/libThapiSampling.la libze_loader_la_LDFLAGS += -version-info 1:0:0 libze_loader_la_LIBADD = libzetracepoints.la +install-exec-hook: + $(MKDIR_P) $(DESTDIR)$(pkglibdir)/ze + $(LN_S) -f $(DESTDIR)$(libdir)/libze_loader.so.1.0.0 $(DESTDIR)$(pkglibdir)/ze/libze_loader.so.1 + $(LN_S) -f $(DESTDIR)$(pkglibdir)/ze/libze_loader.so.1 $(DESTDIR)$(pkglibdir)/ze/libze_loader.so + $(MKDIR_P) $(DESTDIR)$(pkglibdir)/bt2 + $(LN) -f $(DESTDIR)$(libdir)/libZEInterval.so $(DESTDIR)$(pkglibdir)/bt2/libZEInterval.so + +uninstall-hook: + $(RM) -f $(DESTDIR)$(pkglibdir)/ze/libze_loader.so.1 $(DESTDIR)$(pkglibdir)/ze/libze_loader.so + -rmdir $(DESTDIR)$(pkglibdir)/ze + $(RM) -f $(DESTDIR)$(pkglibdir)/bt2/libZEInterval.so + -rmdir $(DESTDIR)$(pkglibdir)/bt2 + tmplibdir = $(libdir)/tmp install-data-hook: diff --git a/ze/sampling_daemon.c b/ze/sampling_daemon.c new file mode 100644 index 00000000..60fd48b3 --- /dev/null +++ b/ze/sampling_daemon.c @@ -0,0 +1,841 @@ +#include +#include +#include "ze.h.include" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "uthash.h" +#include "utlist.h" + +#include "ze_tracepoints.h" +#include "zet_tracepoints.h" +#include "zes_tracepoints.h" +#include "zel_tracepoints.h" +#include "zex_tracepoints.h" +#include "ze_structs_tracepoints.h" +#include "zet_structs_tracepoints.h" +#include "zes_structs_tracepoints.h" +#include "zel_structs_tracepoints.h" +#include "zex_structs_tracepoints.h" +#include "ze_sampling.h" +#include "ze_profiling.h" +#include "ze_properties.h" +#include "ze_build.h" +#include "sampling_daemon.h" +#include "../sampling/thapi_sampling.h" +#include +#include + +#define RT_SIGNAL_SAMPLING_READY SIGRTMIN +#define RT_SIGNAL_SAMPLING_FINISH SIGRTMIN + 1 + +#define ZES_INIT_PTR zesInit_ptr + +#define ZES_DRIVER_GET_PTR zesDriverGet_ptr + +#define ZES_DEVICE_GET_PTR zesDeviceGet_ptr + +#define ZES_DEVICE_GET_PROPERTIES_PTR zesDeviceGetProperties_ptr + + +#define ZES_DEVICE_ENUM_POWER_DOMAINS_PTR zesDeviceEnumPowerDomains_ptr + +#define ZES_POWER_GET_PROPERTIES_PTR zesPowerGetProperties_ptr + +#define ZES_POWER_GET_ENERGY_COUNTER_PTR zesPowerGetEnergyCounter_ptr + + + +#define ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR zesDeviceEnumFrequencyDomains_ptr + +#define ZES_FREQUENCY_GET_PROPERTIES_PTR zesFrequencyGetProperties_ptr + +#define ZES_FREQUENCY_GET_STATE_PTR zesFrequencyGetState_ptr + + +#define ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR zesDeviceEnumEngineGroups_ptr + +#define ZES_ENGINE_GET_PROPERTIES_PTR zesEngineGetProperties_ptr + +#define ZES_ENGINE_GET_ACTIVITY_PTR zesEngineGetActivity_ptr + + + +#define ZES_DEVICE_ENUM_FABRIC_PORTS_PTR zesDeviceEnumFabricPorts_ptr + +#define ZES_FABRIC_PORT_GET_PROPERTIES_PTR zesFabricPortGetProperties_ptr + +#define ZES_FABRIC_PORT_GET_STATE_PTR zesFabricPortGetState_ptr + +#define ZES_FABRIC_PORT_GET_THROUGHPUT_PTR zesFabricPortGetThroughput_ptr + + + +#define ZES_DEVICE_ENUM_MEMORY_MODULES_PTR zesDeviceEnumMemoryModules_ptr + +#define ZES_MEMORY_GET_PROPERTIES_PTR zesMemoryGetProperties_ptr + +#define ZES_MEMORY_GET_STATE_PTR zesMemoryGetState_ptr + +#define ZES_MEMORY_GET_BANDWIDTH_PTR zesMemoryGetBandwidth_ptr + + + +typedef ze_result_t (*zesInit_t)(zes_init_flags_t flags); +static zesInit_t ZES_INIT_PTR = (void *) 0x0; + +typedef ze_result_t (*zesDriverGet_t)(uint32_t *pCount, zes_driver_handle_t *phDrivers); +static zesDriverGet_t ZES_DRIVER_GET_PTR = (void *) 0x0; + +typedef ze_result_t (*zesDeviceGet_t)(zes_driver_handle_t hDriver, uint32_t *pCount, zes_device_handle_t *phDevices); +static zesDeviceGet_t ZES_DEVICE_GET_PTR = (void *) 0x0; + +typedef ze_result_t (*zesDeviceGetProperties_t)(zes_device_handle_t hDevice, zes_device_properties_t *pProperties); +static zesDeviceGetProperties_t ZES_DEVICE_GET_PROPERTIES_PTR = (void *) 0x0; + + + +typedef ze_result_t (*zesDeviceEnumPowerDomains_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_pwr_handle_t *phPower); +static zesDeviceEnumPowerDomains_t ZES_DEVICE_ENUM_POWER_DOMAINS_PTR = (void *) 0x0; + +typedef ze_result_t (*zesPowerGetProperties_t)(zes_pwr_handle_t hPower, zes_power_properties_t *pProperties); +static zesPowerGetProperties_t ZES_POWER_GET_PROPERTIES_PTR = (void *) 0x0; + +typedef ze_result_t (*zesPowerGetEnergyCounter_t)(zes_pwr_handle_t hPower, zes_power_energy_counter_t *pEnergy); +static zesPowerGetEnergyCounter_t ZES_POWER_GET_ENERGY_COUNTER_PTR = (void *) 0x0; + + + +typedef ze_result_t (*zesDeviceEnumFrequencyDomains_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_freq_handle_t *phFrequency); +static zesDeviceEnumFrequencyDomains_t ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR = (void *) 0x0; + +typedef ze_result_t (*zesFrequencyGetProperties_t)(zes_freq_handle_t hFrequency, zes_freq_properties_t *pProperties); +static zesFrequencyGetProperties_t ZES_FREQUENCY_GET_PROPERTIES_PTR = (void *) 0x0; + +typedef ze_result_t (*zesFrequencyGetState_t)(zes_freq_handle_t hFrequency, zes_freq_state_t *pState); +static zesFrequencyGetState_t ZES_FREQUENCY_GET_STATE_PTR = (void *) 0x0; + + + +typedef ze_result_t (*zesDeviceEnumEngineGroups_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_engine_handle_t *phEngine); +static zesDeviceEnumEngineGroups_t ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR = (void *) 0x0; + +typedef ze_result_t (*zesEngineGetProperties_t)(zes_engine_handle_t hEngine, zes_engine_properties_t *pProperties); +static zesEngineGetProperties_t ZES_ENGINE_GET_PROPERTIES_PTR = (void *) 0x0; + +typedef ze_result_t (*zesEngineGetActivity_t)(zes_engine_handle_t hEngine, zes_engine_stats_t *pStats); +static zesEngineGetActivity_t ZES_ENGINE_GET_ACTIVITY_PTR = (void *) 0x0; + + + +typedef ze_result_t (*zesDeviceEnumFabricPorts_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_fabric_port_handle_t *phPort); +static zesDeviceEnumFabricPorts_t ZES_DEVICE_ENUM_FABRIC_PORTS_PTR = (void *) 0x0; + +typedef ze_result_t (*zesFabricPortGetProperties_t)(zes_fabric_port_handle_t hPort, zes_fabric_port_properties_t *pProperties); +static zesFabricPortGetProperties_t ZES_FABRIC_PORT_GET_PROPERTIES_PTR = (void *) 0x0; + +typedef ze_result_t (*zesFabricPortGetState_t)(zes_fabric_port_handle_t hPort, zes_fabric_port_state_t *pState); +static zesFabricPortGetState_t ZES_FABRIC_PORT_GET_STATE_PTR = (void *) 0x0; + +typedef ze_result_t (*zesFabricPortGetThroughput_t)(zes_fabric_port_handle_t hPort, zes_fabric_port_throughput_t *pThroughput); +static zesFabricPortGetThroughput_t ZES_FABRIC_PORT_GET_THROUGHPUT_PTR = (void *) 0x0; + + +typedef ze_result_t (*zesDeviceEnumMemoryModules_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_mem_handle_t *phMemory); +static zesDeviceEnumMemoryModules_t ZES_DEVICE_ENUM_MEMORY_MODULES_PTR = (void *) 0x0; + +typedef ze_result_t (*zesMemoryGetProperties_t)(zes_mem_handle_t hMemory, zes_mem_properties_t *pProperties); +static zesMemoryGetProperties_t ZES_MEMORY_GET_PROPERTIES_PTR = (void *) 0x0; + +typedef ze_result_t (*zesMemoryGetState_t)(zes_mem_handle_t hMemory, zes_mem_state_t *pState); +static zesMemoryGetState_t ZES_MEMORY_GET_STATE_PTR = (void *) 0x0; + +typedef ze_result_t (*zesMemoryGetBandwidth_t)(zes_mem_handle_t hMemory, zes_mem_bandwidth_t *pBandwidth); +static zesMemoryGetBandwidth_t ZES_MEMORY_GET_BANDWIDTH_PTR = (void *) 0x0; + +static void find_ze_symbols(void * handle, int verbose) { + + ZES_INIT_PTR = (zesInit_t)(intptr_t)dlsym(handle, "zesInit"); + if (!ZES_INIT_PTR && verbose) + fprintf(stderr, "Missing symbol zesInit!\n"); + + ZES_DRIVER_GET_PTR = (zesDriverGet_t)(intptr_t)dlsym(handle, "zesDriverGet"); + if (!ZES_DRIVER_GET_PTR && verbose) + fprintf(stderr, "Missing symbol zesDriverGet!\n"); + +ZES_DEVICE_GET_PTR = (zesDeviceGet_t)(intptr_t)dlsym(handle, "zesDeviceGet"); + if (!ZES_DEVICE_GET_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceGet!\n"); + + ZES_DEVICE_GET_PROPERTIES_PTR = (zesDeviceGetProperties_t)(intptr_t)dlsym(handle, "zesDeviceGetProperties"); + if (!ZES_DEVICE_GET_PROPERTIES_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceGetProperties!\n"); + + + +ZES_DEVICE_ENUM_POWER_DOMAINS_PTR = (zesDeviceEnumPowerDomains_t)(intptr_t)dlsym(handle, "zesDeviceEnumPowerDomains"); + if (!ZES_DEVICE_ENUM_POWER_DOMAINS_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceEnumPowerDomains!\n"); + +ZES_POWER_GET_PROPERTIES_PTR = (zesPowerGetProperties_t)(intptr_t)dlsym(handle, "zesPowerGetProperties"); + if (!ZES_POWER_GET_PROPERTIES_PTR && verbose) + fprintf(stderr, "Missing symbol zesPowerGetProperties!\n"); + + ZES_POWER_GET_ENERGY_COUNTER_PTR = (zesPowerGetEnergyCounter_t)(intptr_t)dlsym(handle, "zesPowerGetEnergyCounter"); + if (!ZES_POWER_GET_ENERGY_COUNTER_PTR && verbose) + fprintf(stderr, "Missing symbol zesPowerGetEnergyCounter!\n"); + + + +ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR = (zesDeviceEnumFrequencyDomains_t)(intptr_t)dlsym(handle, "zesDeviceEnumFrequencyDomains"); + if (!ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceEnumFrequencyDomains!\n"); + + ZES_FREQUENCY_GET_PROPERTIES_PTR = (zesFrequencyGetProperties_t)(intptr_t)dlsym(handle, "zesFrequencyGetProperties"); + if (!ZES_FREQUENCY_GET_PROPERTIES_PTR && verbose) + fprintf(stderr, "Missing symbol zesFrequencyGetProperties!\n"); + + ZES_FREQUENCY_GET_STATE_PTR = (zesFrequencyGetState_t)(intptr_t)dlsym(handle, "zesFrequencyGetState"); + if (!ZES_FREQUENCY_GET_STATE_PTR && verbose) + fprintf(stderr, "Missing symbol zesFrequencyGetState!\n"); + + + +ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR = (zesDeviceEnumEngineGroups_t)(intptr_t)dlsym(handle, "zesDeviceEnumEngineGroups"); + if (!ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceEnumEngineGroups!\n"); + + ZES_ENGINE_GET_PROPERTIES_PTR = (zesEngineGetProperties_t)(intptr_t)dlsym(handle, "zesEngineGetProperties"); + if (!ZES_ENGINE_GET_PROPERTIES_PTR && verbose) + fprintf(stderr, "Missing symbol zesEngineGetProperties!\n"); + + ZES_ENGINE_GET_ACTIVITY_PTR = (zesEngineGetActivity_t)(intptr_t)dlsym(handle, "zesEngineGetActivity"); + if (!ZES_ENGINE_GET_ACTIVITY_PTR && verbose) + fprintf(stderr, "Missing symbol zesEngineGetActivity!\n"); + + + +ZES_DEVICE_ENUM_FABRIC_PORTS_PTR = (zesDeviceEnumFabricPorts_t)(intptr_t)dlsym(handle, "zesDeviceEnumFabricPorts"); + if (!ZES_DEVICE_ENUM_FABRIC_PORTS_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceEnumFabricPorts!\n"); + + ZES_FABRIC_PORT_GET_PROPERTIES_PTR = (zesFabricPortGetProperties_t)(intptr_t)dlsym(handle, "zesFabricPortGetProperties"); + if (!ZES_FABRIC_PORT_GET_PROPERTIES_PTR && verbose) + fprintf(stderr, "Missing symbol zesFabricPortGetProperties!\n"); + +ZES_FABRIC_PORT_GET_STATE_PTR = (zesFabricPortGetState_t)(intptr_t)dlsym(handle, "zesFabricPortGetState"); + if (!ZES_FABRIC_PORT_GET_STATE_PTR && verbose) + fprintf(stderr, "Missing symbol zesFabricPortGetState!\n"); + + ZES_FABRIC_PORT_GET_THROUGHPUT_PTR = (zesFabricPortGetThroughput_t)(intptr_t)dlsym(handle, "zesFabricPortGetThroughput"); + if (!ZES_FABRIC_PORT_GET_THROUGHPUT_PTR && verbose) + fprintf(stderr, "Missing symbol zesFabricPortGetThroughput!\n"); + + + +ZES_DEVICE_ENUM_MEMORY_MODULES_PTR = (zesDeviceEnumMemoryModules_t)(intptr_t)dlsym(handle, "zesDeviceEnumMemoryModules"); + if (!ZES_DEVICE_ENUM_MEMORY_MODULES_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceEnumMemoryModules!\n"); + + ZES_MEMORY_GET_PROPERTIES_PTR = (zesMemoryGetProperties_t)(intptr_t)dlsym(handle, "zesMemoryGetProperties"); + if (!ZES_MEMORY_GET_PROPERTIES_PTR && verbose) + fprintf(stderr, "Missing symbol zesMemoryGetProperties!\n"); + + ZES_MEMORY_GET_STATE_PTR = (zesMemoryGetState_t)(intptr_t)dlsym(handle, "zesMemoryGetState"); + if (!ZES_MEMORY_GET_STATE_PTR && verbose) + fprintf(stderr, "Missing symbol zesMemoryGetState!\n"); + + ZES_MEMORY_GET_BANDWIDTH_PTR = (zesMemoryGetBandwidth_t)(intptr_t)dlsym(handle, "zesMemoryGetBandwidth"); + if (!ZES_MEMORY_GET_BANDWIDTH_PTR && verbose) + fprintf(stderr, "Missing symbol zesMemoryGetBandwidth!\n"); + +} + +thapi_sampling_handle_t _sampling_handle = NULL; +static int _sampling_freq_initialized = 0; +static int _sampling_fabricPorts_initialized = 0; +static int _sampling_memModules_initialized = 0; +static int _sampling_pwr_initialized = 0; +static int _sampling_engines_initialized = 0; +// Static handles to stay throughout the execution +static zes_driver_handle_t *_sampling_hDrivers = NULL; +static zes_device_handle_t **_sampling_hDevices = NULL; +static zes_freq_handle_t ***_sampling_hFrequencies = NULL; +static zes_pwr_handle_t ***_sampling_hPowers = NULL; +static zes_engine_handle_t ***_sampling_engineHandles = NULL; +static zes_fabric_port_handle_t ***_sampling_hFabricPort = NULL; +static zes_mem_handle_t ***_sampling_hMemModule = NULL; +static uint32_t _sampling_driverCount = 0; +static uint32_t *_sampling_deviceCount = NULL; +static uint32_t **_sampling_freqDomainCounts = NULL; +static uint32_t **_sampling_fabricPortCount = NULL; +static uint32_t **_sampling_memModuleCount = NULL; +static uint32_t **_sampling_powerDomainCounts = NULL; +static uint32_t **_sampling_engineCounts = NULL; + + +//////////////////////////////////////////// +#define _ZE_ERROR_MSG(NAME,RES) do {\ + fprintf(stderr,"%s() failed at %d(%s): res=%x\n",(NAME),__LINE__,__FILE__,(RES));\ +} while (0) +#define _ZE_ERROR_MSG_NOTERMINATE(NAME,RES) do {\ + fprintf(stderr,"%s() error at %d(%s): res=%x\n",(NAME),__LINE__,__FILE__,(RES));\ +} while (0) +#define _ERROR_MSG(MSG) {perror((MSG)) do {\ + {perror((MSG)); fprintf(stderr,"errno=%d at %d(%s)",errno,__LINE__,__FILE__);\ +} while (0) + +static void intializeFrequency() { + ze_result_t res; + _sampling_hFrequencies = + (zes_freq_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_freq_handle_t **)); + _sampling_freqDomainCounts = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + _sampling_freqDomainCounts[driverIdx] = + (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); + _sampling_hFrequencies[driverIdx] = + (zes_freq_handle_t **)calloc(_sampling_deviceCount[driverIdx], sizeof(zes_freq_handle_t *)); + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + // Get frequency domains for each device + res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_freqDomainCounts[driverIdx][deviceIdx], + NULL); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); + _sampling_freqDomainCounts[driverIdx][deviceIdx] = 0; + continue; + } + _sampling_hFrequencies[driverIdx][deviceIdx] = (zes_freq_handle_t *)calloc( + _sampling_freqDomainCounts[driverIdx][deviceIdx], sizeof(zes_freq_handle_t)); + res =ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_freqDomainCounts[driverIdx][deviceIdx], + _sampling_hFrequencies[driverIdx][deviceIdx]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); + _sampling_freqDomainCounts[driverIdx][deviceIdx] = 0; + free(_sampling_hFrequencies[driverIdx][deviceIdx]); + } + for (uint32_t domainIdx = 0; domainIdx < _sampling_freqDomainCounts[driverIdx][deviceIdx]; + domainIdx++) { + zes_freq_properties_t freqProps = {0}; + freqProps.stype = ZES_STRUCTURE_TYPE_FREQ_PROPERTIES; + res =ZES_FREQUENCY_GET_PROPERTIES_PTR( + _sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], &freqProps); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_FREQUENCY_GET_PROPERTIES_PTR", res); + free(_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx]); + } + do_tracepoint(lttng_ust_ze_sampling, freqProperties, + (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_freq_handle_t)_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], + &freqProps); + } + } + } + _sampling_freq_initialized = 1; +} + +static void intializePower() { + ze_result_t res; + _sampling_hPowers = + (zes_pwr_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_pwr_handle_t **)); + _sampling_powerDomainCounts = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + _sampling_hPowers[driverIdx] = + (zes_pwr_handle_t **)calloc(_sampling_deviceCount[driverIdx], sizeof(zes_pwr_handle_t *)); + _sampling_powerDomainCounts[driverIdx] = + (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + // Get power domains for each device + res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_powerDomainCounts[driverIdx][deviceIdx], + NULL); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); + _sampling_powerDomainCounts[driverIdx][deviceIdx] = 0; + continue; + } + _sampling_hPowers[driverIdx][deviceIdx] = (zes_pwr_handle_t *)calloc( + _sampling_powerDomainCounts[driverIdx][deviceIdx], sizeof(zes_pwr_handle_t)); + res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_powerDomainCounts[driverIdx][deviceIdx], + _sampling_hPowers[driverIdx][deviceIdx]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); + _sampling_powerDomainCounts[driverIdx][deviceIdx] = 0; + free(_sampling_hPowers[driverIdx][deviceIdx]); + } + for (uint32_t domainIdx = 0; domainIdx < _sampling_powerDomainCounts[driverIdx][deviceIdx]; + domainIdx++) { + zes_power_properties_t powerProperties = {0}; + powerProperties.stype = ZES_STRUCTURE_TYPE_POWER_PROPERTIES; + res = ZES_POWER_GET_PROPERTIES_PTR(_sampling_hPowers[driverIdx][deviceIdx][domainIdx], + &powerProperties); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_POWER_GET_PROPERTIES_PTR", res); + free(_sampling_hPowers[driverIdx][deviceIdx][domainIdx]); + } + do_tracepoint(lttng_ust_ze_sampling, powerProperties, + (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_pwr_handle_t)_sampling_hPowers[driverIdx][deviceIdx][domainIdx], + &powerProperties); + } + } + } + _sampling_pwr_initialized = 1; +} + +static void intializeEngines() { + ze_result_t res; + _sampling_engineHandles = + (zes_engine_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_engine_handle_t **)); + _sampling_engineCounts = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + _sampling_engineHandles[driverIdx] = (zes_engine_handle_t **)calloc( + _sampling_deviceCount[driverIdx], sizeof(zes_engine_handle_t *)); + _sampling_engineCounts[driverIdx] = + (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + // Get engine counts for each device + res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_engineCounts[driverIdx][deviceIdx], NULL); + if (res != ZE_RESULT_SUCCESS || _sampling_engineCounts[driverIdx][deviceIdx] == 0) { + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); + _sampling_engineCounts[driverIdx][deviceIdx] = 0; + continue; + } + _sampling_engineHandles[driverIdx][deviceIdx] = (zes_engine_handle_t *)calloc( + _sampling_engineCounts[driverIdx][deviceIdx], sizeof(zes_engine_handle_t)); + res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_engineCounts[driverIdx][deviceIdx], + _sampling_engineHandles[driverIdx][deviceIdx]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); + _sampling_engineCounts[driverIdx][deviceIdx] = 0; + free(_sampling_engineHandles[driverIdx][deviceIdx]); + } + for (uint32_t engineIdx = 0; engineIdx < _sampling_engineCounts[driverIdx][deviceIdx]; + ++engineIdx) { + zes_engine_properties_t engineProps = {0}; + engineProps.stype = ZES_STRUCTURE_TYPE_ENGINE_PROPERTIES; + res = ZES_ENGINE_GET_PROPERTIES_PTR( + _sampling_engineHandles[driverIdx][deviceIdx][engineIdx], &engineProps); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_ENGINE_GET_PROPERTIES_PTR", res); + } + do_tracepoint(lttng_ust_ze_sampling, engineProperties, + (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_engine_handle_t)_sampling_engineHandles[driverIdx][deviceIdx][engineIdx], + &engineProps); + } + } + } + _sampling_engines_initialized = 1; +} + + +static void intializeFabricPorts() { + ze_result_t res; + _sampling_hFabricPort = (zes_fabric_port_handle_t ***)calloc(_sampling_driverCount, + sizeof(zes_fabric_port_handle_t **)); + _sampling_fabricPortCount = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + _sampling_fabricPortCount[driverIdx] = + (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); + _sampling_hFabricPort[driverIdx] = (zes_fabric_port_handle_t **)calloc( + _sampling_deviceCount[driverIdx], sizeof(zes_fabric_port_handle_t *)); + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + // Get fabric ports for each device + res = + ZES_DEVICE_ENUM_FABRIC_PORTS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_fabricPortCount[driverIdx][deviceIdx], NULL); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FABRIC_PORTS_PTR", res); + _sampling_fabricPortCount[driverIdx][deviceIdx] = 0; + continue; + } + _sampling_hFabricPort[driverIdx][deviceIdx] = (zes_fabric_port_handle_t *)calloc( + _sampling_fabricPortCount[driverIdx][deviceIdx], sizeof(zes_fabric_port_handle_t)); + res = ZES_DEVICE_ENUM_FABRIC_PORTS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_fabricPortCount[driverIdx][deviceIdx], + _sampling_hFabricPort[driverIdx][deviceIdx]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_FABRIC_PORTS_PTR", res); + _sampling_fabricPortCount[driverIdx][deviceIdx] = 0; + free(_sampling_hFabricPort[driverIdx][deviceIdx]); + } + for (uint32_t fabricPortIdx = 0; + fabricPortIdx < _sampling_fabricPortCount[driverIdx][deviceIdx]; ++fabricPortIdx) { + + zes_fabric_port_properties_t fabricPortProps = {0}; + res = ZES_FABRIC_PORT_GET_PROPERTIES_PTR( + _sampling_hFabricPort[driverIdx][deviceIdx][fabricPortIdx], &fabricPortProps); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_PROPERTIES_PTR", res); + } + // Dump fabricPortProperties once + do_tracepoint( + lttng_ust_ze_sampling, fabricPortProperties, + (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_fabric_port_handle_t)_sampling_hFabricPort[driverIdx][deviceIdx][fabricPortIdx], + &fabricPortProps); + } + } + } + _sampling_fabricPorts_initialized = 1; +} + + +static void intializeMemModules() { + ze_result_t res; + _sampling_hMemModule = + (zes_mem_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_mem_handle_t **)); + _sampling_memModuleCount = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + _sampling_memModuleCount[driverIdx] = + (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); + _sampling_hMemModule[driverIdx] = + (zes_mem_handle_t **)calloc(_sampling_deviceCount[driverIdx], sizeof(zes_mem_handle_t *)); + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + // Get fabric ports for each device + res = + ZES_DEVICE_ENUM_MEMORY_MODULES_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_memModuleCount[driverIdx][deviceIdx], NULL); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_MEMORY_MODULES_PTR", res); + _sampling_memModuleCount[driverIdx][deviceIdx] = 0; + continue; + } + _sampling_hMemModule[driverIdx][deviceIdx] = (zes_mem_handle_t *)calloc( + _sampling_memModuleCount[driverIdx][deviceIdx], sizeof(zes_mem_handle_t)); + res = ZES_DEVICE_ENUM_MEMORY_MODULES_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_memModuleCount[driverIdx][deviceIdx], + _sampling_hMemModule[driverIdx][deviceIdx]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_MEMORY_MODULES_PTR", res); + _sampling_memModuleCount[driverIdx][deviceIdx] = 0; + free(_sampling_hMemModule[driverIdx][deviceIdx]); + } + for (uint32_t memModuleIdx = 0; memModuleIdx < _sampling_memModuleCount[driverIdx][deviceIdx]; + ++memModuleIdx) { + zes_mem_properties_t memProps = {0}; + memProps.stype = ZES_STRUCTURE_TYPE_MEM_PROPERTIES; + res = ZES_MEMORY_GET_PROPERTIES_PTR( + _sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], &memProps); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_MEMORY_GET_PROPERTIES_PTR", res); + } + // Dump fabricPortProperties once + do_tracepoint(lttng_ust_ze_sampling, memoryProperties, + (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_mem_handle_t)_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], + &memProps); + } + } + } + _sampling_memModules_initialized = 1; +} + + +static int initializeHandles() { + ze_result_t res; + //find_ze_symbols(handle, NULL); + res = ZES_INIT_PTR(0); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_INIT_PTR", res); + return -1; + } + + // Query driver + _sampling_driverCount = 0; + res = ZES_DRIVER_GET_PTR(&_sampling_driverCount, NULL); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("1st ZES_DRIVER_GET_PTR", res); + return -1; + } + _sampling_hDrivers = + (zes_driver_handle_t *)calloc(_sampling_driverCount, sizeof(zes_driver_handle_t)); + res = ZES_DRIVER_GET_PTR(&_sampling_driverCount, _sampling_hDrivers); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DRIVER_GET_PTR", res); + return -1; + } + _sampling_deviceCount = (uint32_t *)calloc(_sampling_driverCount, sizeof(uint32_t)); + _sampling_hDevices = + (zes_device_handle_t **)calloc(_sampling_driverCount, sizeof(zes_device_handle_t *)); + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + res = + ZES_DEVICE_GET_PTR(_sampling_hDrivers[driverIdx], &_sampling_deviceCount[driverIdx], NULL); + if (res != ZE_RESULT_SUCCESS || _sampling_deviceCount[driverIdx] == 0) { + fprintf(stderr, "ERROR: No device found!\n"); + _ZE_ERROR_MSG("1st ZES_DEVICE_GET_PTR", res); + return -1; + } + _sampling_hDevices[driverIdx] = (zes_device_handle_t *)calloc(_sampling_deviceCount[driverIdx], + sizeof(zes_device_handle_t)); + res = ZES_DEVICE_GET_PTR(_sampling_hDrivers[driverIdx], &_sampling_deviceCount[driverIdx], + _sampling_hDevices[driverIdx]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_GET_PTR", res); + free(_sampling_hDevices[driverIdx]); + return -1; + } + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + + zes_device_properties_t deviceProps = {0}; + deviceProps.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES; + deviceProps.pNext = NULL; + res = ZES_DEVICE_GET_PROPERTIES_PTR(_sampling_hDevices[driverIdx][deviceIdx], &deviceProps); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_DEVICE_GET_PROPERTIES_PTR", res); + } + do_tracepoint(lttng_ust_ze_sampling, deviceProperties, + (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], deviceIdx, + &deviceProps); + } + } + intializeFrequency(); + intializePower(); + intializeEngines(); + intializeFabricPorts(); + intializeMemModules(); + return 0; +} + +static void readFrequency_dump(uint32_t driverIdx, uint32_t deviceIdx) { + if (!_sampling_freq_initialized) + return; + ze_result_t result; + for (uint32_t domainIdx = 0; domainIdx < _sampling_freqDomainCounts[driverIdx][deviceIdx]; + domainIdx++) { + zes_freq_state_t freqState = {0}; + result = ZES_FREQUENCY_GET_STATE_PTR(_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], + &freqState); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_FREQUENCY_GET_STATE_PTR", result); + continue; + } + do_tracepoint(lttng_ust_ze_sampling, gpu_frequency, + (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_freq_handle_t)_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], + domainIdx, &freqState); + } +} + +static void readFabricPorts_dump(uint32_t driverIdx, uint32_t deviceIdx) { + if (!_sampling_fabricPorts_initialized) + return; + ze_result_t result; + for (uint32_t portIdx = 0; portIdx < _sampling_fabricPortCount[driverIdx][deviceIdx]; portIdx++) { + zes_fabric_port_state_t portState = {0}; + portState.pNext = NULL; + portState.stype = ZES_STRUCTURE_TYPE_FABRIC_PORT_STATE; + result = ZES_FABRIC_PORT_GET_STATE_PTR(_sampling_hFabricPort[driverIdx][deviceIdx][portIdx], + &portState); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_STATE_PTR", result); + continue; + } + zes_fabric_port_throughput_t throughput = {0}; + result = ZES_FABRIC_PORT_GET_THROUGHPUT_PTR( + _sampling_hFabricPort[driverIdx][deviceIdx][portIdx], &throughput); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_THROUGHPUT_PTR", result); + continue; + } + do_tracepoint(lttng_ust_ze_sampling, fabricPort, + (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_fabric_port_handle_t)_sampling_hFabricPort[driverIdx][deviceIdx][portIdx], + &portState, &throughput); + } +} + + +static void readMemModules_dump(uint32_t driverIdx, uint32_t deviceIdx) { + if (!_sampling_memModules_initialized) + return; + ze_result_t result; + for (uint32_t memModuleIdx = 0; memModuleIdx < _sampling_memModuleCount[driverIdx][deviceIdx]; + ++memModuleIdx) { + zes_mem_state_t memState = {0}; + memState.stype = ZES_STRUCTURE_TYPE_MEM_STATE; + zes_mem_bandwidth_t memBandwidth = {0}; + result = ZES_MEMORY_GET_STATE_PTR(_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], + &memState); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_MEMORY_GET_STATE_PTR", result); + continue; + } + result = ZES_MEMORY_GET_BANDWIDTH_PTR(_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], + &memBandwidth); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_MEMORY_GET_BANDWIDTH_PTR", result); + continue; + } + do_tracepoint(lttng_ust_ze_sampling, memStats, + (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_mem_handle_t)_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], + &memState, &memBandwidth); + } +} + + +static void readEnergy_dump(uint32_t driverIdx, uint32_t deviceIdx) { + if (!_sampling_pwr_initialized) + return; + ze_result_t result; + for (uint32_t domainIdx = 0; domainIdx < _sampling_powerDomainCounts[driverIdx][deviceIdx]; + domainIdx++) { + zes_power_energy_counter_t energyCounter = {0}; + result = ZES_POWER_GET_ENERGY_COUNTER_PTR(_sampling_hPowers[driverIdx][deviceIdx][domainIdx], + &energyCounter); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_POWER_GET_ENERGY_COUNTER_PTR", result); + continue; + } + do_tracepoint(lttng_ust_ze_sampling, gpu_energy, + (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_pwr_handle_t)_sampling_hPowers[driverIdx][deviceIdx][domainIdx], domainIdx, + &energyCounter); + } +} + +static void readEngines_dump(uint32_t driverIdx, uint32_t deviceIdx) { + if (!_sampling_engines_initialized) + return; + ze_result_t result; + for (uint32_t engineIdx = 0; engineIdx < _sampling_engineCounts[driverIdx][deviceIdx]; + ++engineIdx) { + zes_engine_stats_t engineStats = {0}; + result = ZES_ENGINE_GET_ACTIVITY_PTR(_sampling_engineHandles[driverIdx][deviceIdx][engineIdx], + &engineStats); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_ENGINE_GET_ACTIVITY_PTR", result); + continue; + } + do_tracepoint(lttng_ust_ze_sampling, engineStats, + (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_engine_handle_t)_sampling_engineHandles[driverIdx][deviceIdx][engineIdx], + &engineStats); + } +} + + +static void thapi_sampling_energy() { + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + if (tracepoint_enabled(lttng_ust_ze_sampling, gpu_frequency)) { + readFrequency_dump(driverIdx, deviceIdx); + } + if (tracepoint_enabled(lttng_ust_ze_sampling, gpu_energy)) { + readEnergy_dump(driverIdx, deviceIdx); + } + if (tracepoint_enabled(lttng_ust_ze_sampling, engineStats)) { + readEngines_dump(driverIdx, deviceIdx); + } + if (tracepoint_enabled(lttng_ust_ze_sampling, fabricPort)) { + readFabricPorts_dump(driverIdx, deviceIdx); + } + if (tracepoint_enabled(lttng_ust_ze_sampling, memStats)) { + readMemModules_dump(driverIdx, deviceIdx); + } + } + } +} + +void process_sampling() { + + struct timespec interval; + interval.tv_sec = 0; + interval.tv_nsec = 50000000; // 50ms interval + thapi_sampling_energy(); + _sampling_handle = thapi_register_sampling(&thapi_sampling_energy, &interval); + +} +void cleanup_sampling() { + if (_sampling_handle) { + thapi_unregister_sampling(_sampling_handle); + _sampling_handle = NULL; + } +} + +// Signal handling loop +int signal_loop(int parent_pid) { + // Initialize signal set and add signals + sigset_t signal_set; + sigemptyset(&signal_set); + sigaddset(&signal_set, RT_SIGNAL_SAMPLING_READY); + sigaddset(&signal_set, RT_SIGNAL_SAMPLING_FINISH); + + // Block signals + sigprocmask(SIG_BLOCK, &signal_set, NULL); + + // Signal the parent process READY + kill(parent_pid, RT_SIGNAL_SAMPLING_READY); + + // Processing loop: until RT_SIGNAL_FINISH + while (1) { + int signum; + sigwait(&signal_set, &signum); + + if (signum == RT_SIGNAL_SAMPLING_FINISH) { + return 0; + } else { + // Example action when READY signal is received + process_sampling(); + printf("Starting \n"); + kill(parent_pid, RT_SIGNAL_SAMPLING_READY); // Signal parent + } + } + + // Unreachable + fprintf(stderr, "Exited signal loop unexpectedly.\n"); + return 1; +} + +int main(int argc, char **argv) { + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + int parent_pid = atoi(argv[1]); + int verbose = 0; + thapi_sampling_init(); + // Load necessary libraries + void *handle = NULL; + char *s = getenv("LTTNG_UST_ZE_LIBZE_LOADER"); + if (s) { + handle = dlopen(s, RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); + } else { + handle = dlopen("libze_loader.so", RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); + } + + if (!handle) { + fprintf(stderr, "Failure: could not load ze library!\n"); + return 1; + } + + // Initialize daemon + if (getenv("LTTNG_UST_SAMPLING_ENERGY")) { + find_ze_symbols(handle, verbose); + initializeHandles(); + } else { + fprintf(stderr, "Sampling not enabled. Exiting.\n"); + dlclose(handle); + return 0; + } + // Run the signal loop + int ret = signal_loop(parent_pid); + // Cleanup before exiting + cleanup_sampling(); + dlclose(handle); + printf("Daemon exiting with status %d\n", ret); + kill(parent_pid, RT_SIGNAL_SAMPLING_READY); // Notify parent of clean exit + return ret; +} diff --git a/ze/sampling_daemon.h b/ze/sampling_daemon.h new file mode 100644 index 00000000..4f4cd6ed --- /dev/null +++ b/ze/sampling_daemon.h @@ -0,0 +1,7 @@ +#ifndef SAMPLING_DAEMON_H +#define SAMPLING_DAEMON_H + +void initialize_sampling(); +void cleanup_sampling(); + +#endif // SAMPLING_DAEMON_H \ No newline at end of file diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index 8a18f161..ebdc7369 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -1,4 +1,5 @@ -#include "thapi_sampling.h" +//#include "thapi_sampling.h" +//#include "sampling_daemon.h" #ifdef THAPI_DEBUG #define TAHPI_LOG stderr @@ -40,7 +41,7 @@ static int _do_cleanup = 0; static int _do_chained_structs = 0; static int _do_paranoid_drift = 0; static int _do_paranoid_memory_location = 0; -thapi_sampling_handle_t _sampling_handle = NULL; +//thapi_sampling_handle_t _sampling_handle = NULL; pthread_mutex_t ze_closures_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -641,9 +642,7 @@ _lib_cleanup() { if (_do_cleanup) { if (_do_profile) _event_cleanup(); - if (_sampling_handle) - thapi_unregister_sampling(_sampling_handle); - } + } } static void _dump_driver_subdevice_properties(ze_driver_handle_t hDriver, ze_device_handle_t hDevice) { @@ -789,484 +788,14 @@ static inline void _dump_memory_info(ze_command_list_handle_t hCommandList, cons {perror((MSG)); fprintf(stderr,"errno=%d at %d(%s)",errno,__LINE__,__FILE__);\ } while (0) -static int _sampling_freq_initialized = 0; -static int _sampling_fabricPorts_initialized = 0; -static int _sampling_memModules_initialized = 0; -static int _sampling_pwr_initialized = 0; -static int _sampling_engines_initialized = 0; -// Static handles to stay throughout the execution -static zes_driver_handle_t *_sampling_hDrivers = NULL; -static zes_device_handle_t **_sampling_hDevices = NULL; -static zes_freq_handle_t ***_sampling_hFrequencies = NULL; -static zes_pwr_handle_t ***_sampling_hPowers = NULL; -static zes_engine_handle_t ***_sampling_engineHandles = NULL; -static zes_fabric_port_handle_t ***_sampling_hFabricPort = NULL; -static zes_mem_handle_t ***_sampling_hMemModule = NULL; -static uint32_t _sampling_driverCount = 0; -static uint32_t *_sampling_deviceCount = NULL; -static uint32_t **_sampling_freqDomainCounts = NULL; -static uint32_t **_sampling_fabricPortCount = NULL; -static uint32_t **_sampling_memModuleCount = NULL; -static uint32_t **_sampling_powerDomainCounts = NULL; -static uint32_t **_sampling_engineCounts = NULL; - -static void intializeFrequency() { - ze_result_t res; - _sampling_hFrequencies = - (zes_freq_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_freq_handle_t **)); - _sampling_freqDomainCounts = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - _sampling_freqDomainCounts[driverIdx] = - (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); - _sampling_hFrequencies[driverIdx] = - (zes_freq_handle_t **)calloc(_sampling_deviceCount[driverIdx], sizeof(zes_freq_handle_t *)); - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - // Get frequency domains for each device - res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_freqDomainCounts[driverIdx][deviceIdx], - NULL); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); - _sampling_freqDomainCounts[driverIdx][deviceIdx] = 0; - continue; - } - _sampling_hFrequencies[driverIdx][deviceIdx] = (zes_freq_handle_t *)calloc( - _sampling_freqDomainCounts[driverIdx][deviceIdx], sizeof(zes_freq_handle_t)); - res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_freqDomainCounts[driverIdx][deviceIdx], - _sampling_hFrequencies[driverIdx][deviceIdx]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); - _sampling_freqDomainCounts[driverIdx][deviceIdx] = 0; - free(_sampling_hFrequencies[driverIdx][deviceIdx]); - } - for (uint32_t domainIdx = 0; domainIdx < _sampling_freqDomainCounts[driverIdx][deviceIdx]; - domainIdx++) { - zes_freq_properties_t freqProps = {0}; - freqProps.stype = ZES_STRUCTURE_TYPE_FREQ_PROPERTIES; - res = ZES_FREQUENCY_GET_PROPERTIES_PTR( - _sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], &freqProps); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_FREQUENCY_GET_PROPERTIES_PTR", res); - free(_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx]); - } - do_tracepoint(lttng_ust_ze_sampling, freqProperties, - (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_freq_handle_t)_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], - &freqProps); - } - } - } - _sampling_freq_initialized = 1; -} - -static void intializePower() { - ze_result_t res; - _sampling_hPowers = - (zes_pwr_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_pwr_handle_t **)); - _sampling_powerDomainCounts = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - _sampling_hPowers[driverIdx] = - (zes_pwr_handle_t **)calloc(_sampling_deviceCount[driverIdx], sizeof(zes_pwr_handle_t *)); - _sampling_powerDomainCounts[driverIdx] = - (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - // Get power domains for each device - res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_powerDomainCounts[driverIdx][deviceIdx], - NULL); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); - _sampling_powerDomainCounts[driverIdx][deviceIdx] = 0; - continue; - } - _sampling_hPowers[driverIdx][deviceIdx] = (zes_pwr_handle_t *)calloc( - _sampling_powerDomainCounts[driverIdx][deviceIdx], sizeof(zes_pwr_handle_t)); - res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_powerDomainCounts[driverIdx][deviceIdx], - _sampling_hPowers[driverIdx][deviceIdx]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); - _sampling_powerDomainCounts[driverIdx][deviceIdx] = 0; - free(_sampling_hPowers[driverIdx][deviceIdx]); - } - for (uint32_t domainIdx = 0; domainIdx < _sampling_powerDomainCounts[driverIdx][deviceIdx]; - domainIdx++) { - zes_power_properties_t powerProperties = {0}; - powerProperties.stype = ZES_STRUCTURE_TYPE_POWER_PROPERTIES; - res = ZES_POWER_GET_PROPERTIES_PTR(_sampling_hPowers[driverIdx][deviceIdx][domainIdx], - &powerProperties); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_POWER_GET_PROPERTIES_PTR", res); - free(_sampling_hPowers[driverIdx][deviceIdx][domainIdx]); - } - do_tracepoint(lttng_ust_ze_sampling, powerProperties, - (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_pwr_handle_t)_sampling_hPowers[driverIdx][deviceIdx][domainIdx], - &powerProperties); - } - } - } - _sampling_pwr_initialized = 1; -} - -static void intializeEngines() { - ze_result_t res; - _sampling_engineHandles = - (zes_engine_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_engine_handle_t **)); - _sampling_engineCounts = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - _sampling_engineHandles[driverIdx] = (zes_engine_handle_t **)calloc( - _sampling_deviceCount[driverIdx], sizeof(zes_engine_handle_t *)); - _sampling_engineCounts[driverIdx] = - (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - // Get engine counts for each device - res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_engineCounts[driverIdx][deviceIdx], NULL); - if (res != ZE_RESULT_SUCCESS || _sampling_engineCounts[driverIdx][deviceIdx] == 0) { - _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); - _sampling_engineCounts[driverIdx][deviceIdx] = 0; - continue; - } - _sampling_engineHandles[driverIdx][deviceIdx] = (zes_engine_handle_t *)calloc( - _sampling_engineCounts[driverIdx][deviceIdx], sizeof(zes_engine_handle_t)); - res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_engineCounts[driverIdx][deviceIdx], - _sampling_engineHandles[driverIdx][deviceIdx]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); - _sampling_engineCounts[driverIdx][deviceIdx] = 0; - free(_sampling_engineHandles[driverIdx][deviceIdx]); - } - for (uint32_t engineIdx = 0; engineIdx < _sampling_engineCounts[driverIdx][deviceIdx]; - ++engineIdx) { - zes_engine_properties_t engineProps = {0}; - engineProps.stype = ZES_STRUCTURE_TYPE_ENGINE_PROPERTIES; - res = ZES_ENGINE_GET_PROPERTIES_PTR( - _sampling_engineHandles[driverIdx][deviceIdx][engineIdx], &engineProps); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_ENGINE_GET_PROPERTIES_PTR", res); - } - do_tracepoint(lttng_ust_ze_sampling, engineProperties, - (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_engine_handle_t)_sampling_engineHandles[driverIdx][deviceIdx][engineIdx], - &engineProps); - } - } - } - _sampling_engines_initialized = 1; -} - -static void intializeFabricPorts() { - ze_result_t res; - _sampling_hFabricPort = (zes_fabric_port_handle_t ***)calloc(_sampling_driverCount, - sizeof(zes_fabric_port_handle_t **)); - _sampling_fabricPortCount = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - _sampling_fabricPortCount[driverIdx] = - (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); - _sampling_hFabricPort[driverIdx] = (zes_fabric_port_handle_t **)calloc( - _sampling_deviceCount[driverIdx], sizeof(zes_fabric_port_handle_t *)); - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - // Get fabric ports for each device - res = - ZES_DEVICE_ENUM_FABRIC_PORTS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_fabricPortCount[driverIdx][deviceIdx], NULL); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FABRIC_PORTS_PTR", res); - _sampling_fabricPortCount[driverIdx][deviceIdx] = 0; - continue; - } - _sampling_hFabricPort[driverIdx][deviceIdx] = (zes_fabric_port_handle_t *)calloc( - _sampling_fabricPortCount[driverIdx][deviceIdx], sizeof(zes_fabric_port_handle_t)); - res = ZES_DEVICE_ENUM_FABRIC_PORTS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_fabricPortCount[driverIdx][deviceIdx], - _sampling_hFabricPort[driverIdx][deviceIdx]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_FABRIC_PORTS_PTR", res); - _sampling_fabricPortCount[driverIdx][deviceIdx] = 0; - free(_sampling_hFabricPort[driverIdx][deviceIdx]); - } - for (uint32_t fabricPortIdx = 0; - fabricPortIdx < _sampling_fabricPortCount[driverIdx][deviceIdx]; ++fabricPortIdx) { - - zes_fabric_port_properties_t fabricPortProps = {0}; - res = ZES_FABRIC_PORT_GET_PROPERTIES_PTR( - _sampling_hFabricPort[driverIdx][deviceIdx][fabricPortIdx], &fabricPortProps); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_PROPERTIES_PTR", res); - } - // Dump fabricPortProperties once - do_tracepoint( - lttng_ust_ze_sampling, fabricPortProperties, - (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_fabric_port_handle_t)_sampling_hFabricPort[driverIdx][deviceIdx][fabricPortIdx], - &fabricPortProps); - } - } - } - _sampling_fabricPorts_initialized = 1; -} - -static void intializeMemModules() { - ze_result_t res; - _sampling_hMemModule = - (zes_mem_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_mem_handle_t **)); - _sampling_memModuleCount = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - _sampling_memModuleCount[driverIdx] = - (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); - _sampling_hMemModule[driverIdx] = - (zes_mem_handle_t **)calloc(_sampling_deviceCount[driverIdx], sizeof(zes_mem_handle_t *)); - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - // Get fabric ports for each device - res = - ZES_DEVICE_ENUM_MEMORY_MODULES_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_memModuleCount[driverIdx][deviceIdx], NULL); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_MEMORY_MODULES_PTR", res); - _sampling_memModuleCount[driverIdx][deviceIdx] = 0; - continue; - } - _sampling_hMemModule[driverIdx][deviceIdx] = (zes_mem_handle_t *)calloc( - _sampling_memModuleCount[driverIdx][deviceIdx], sizeof(zes_mem_handle_t)); - res = ZES_DEVICE_ENUM_MEMORY_MODULES_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_memModuleCount[driverIdx][deviceIdx], - _sampling_hMemModule[driverIdx][deviceIdx]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_MEMORY_MODULES_PTR", res); - _sampling_memModuleCount[driverIdx][deviceIdx] = 0; - free(_sampling_hMemModule[driverIdx][deviceIdx]); - } - for (uint32_t memModuleIdx = 0; memModuleIdx < _sampling_memModuleCount[driverIdx][deviceIdx]; - ++memModuleIdx) { - zes_mem_properties_t memProps = {0}; - memProps.stype = ZES_STRUCTURE_TYPE_MEM_PROPERTIES; - res = ZES_MEMORY_GET_PROPERTIES_PTR( - _sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], &memProps); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_MEMORY_GET_PROPERTIES_PTR", res); - } - // Dump fabricPortProperties once - do_tracepoint(lttng_ust_ze_sampling, memoryProperties, - (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_mem_handle_t)_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], - &memProps); - } - } - } - _sampling_memModules_initialized = 1; -} - -static int initializeHandles() { - ze_result_t res; - res = ZES_INIT_PTR(0); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_INIT_PTR", res); - return -1; - } - - // Query driver - _sampling_driverCount = 0; - res = ZES_DRIVER_GET_PTR(&_sampling_driverCount, NULL); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("1st ZES_DRIVER_GET_PTR", res); - return -1; - } - _sampling_hDrivers = - (zes_driver_handle_t *)calloc(_sampling_driverCount, sizeof(zes_driver_handle_t)); - res = ZES_DRIVER_GET_PTR(&_sampling_driverCount, _sampling_hDrivers); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DRIVER_GET_PTR", res); - return -1; - } - _sampling_deviceCount = (uint32_t *)calloc(_sampling_driverCount, sizeof(uint32_t)); - _sampling_hDevices = - (zes_device_handle_t **)calloc(_sampling_driverCount, sizeof(zes_device_handle_t *)); - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - res = - ZES_DEVICE_GET_PTR(_sampling_hDrivers[driverIdx], &_sampling_deviceCount[driverIdx], NULL); - if (res != ZE_RESULT_SUCCESS || _sampling_deviceCount[driverIdx] == 0) { - fprintf(stderr, "ERROR: No device found!\n"); - _ZE_ERROR_MSG("1st ZES_DEVICE_GET_PTR", res); - return -1; - } - _sampling_hDevices[driverIdx] = (zes_device_handle_t *)calloc(_sampling_deviceCount[driverIdx], - sizeof(zes_device_handle_t)); - res = ZES_DEVICE_GET_PTR(_sampling_hDrivers[driverIdx], &_sampling_deviceCount[driverIdx], - _sampling_hDevices[driverIdx]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_GET_PTR", res); - free(_sampling_hDevices[driverIdx]); - return -1; - } - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - - zes_device_properties_t deviceProps = {0}; - deviceProps.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES; - deviceProps.pNext = NULL; - res = ZES_DEVICE_GET_PROPERTIES_PTR(_sampling_hDevices[driverIdx][deviceIdx], &deviceProps); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_DEVICE_GET_PROPERTIES_PTR", res); - } - do_tracepoint(lttng_ust_ze_sampling, deviceProperties, - (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], deviceIdx, - &deviceProps); - } - } - intializeFrequency(); - intializePower(); - intializeEngines(); - intializeFabricPorts(); - intializeMemModules(); - return 0; -} - -static void readFrequency_dump(uint32_t driverIdx, uint32_t deviceIdx) { - if (!_sampling_freq_initialized) - return; - ze_result_t result; - for (uint32_t domainIdx = 0; domainIdx < _sampling_freqDomainCounts[driverIdx][deviceIdx]; - domainIdx++) { - zes_freq_state_t freqState = {0}; - result = ZES_FREQUENCY_GET_STATE_PTR(_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], - &freqState); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_FREQUENCY_GET_STATE_PTR", result); - continue; - } - do_tracepoint(lttng_ust_ze_sampling, gpu_frequency, - (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_freq_handle_t)_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], - domainIdx, &freqState); - } -} - -static void readFabricPorts_dump(uint32_t driverIdx, uint32_t deviceIdx) { - if (!_sampling_fabricPorts_initialized) - return; - ze_result_t result; - for (uint32_t portIdx = 0; portIdx < _sampling_fabricPortCount[driverIdx][deviceIdx]; portIdx++) { - zes_fabric_port_state_t portState = {0}; - portState.pNext = NULL; - portState.stype = ZES_STRUCTURE_TYPE_FABRIC_PORT_STATE; - result = ZES_FABRIC_PORT_GET_STATE_PTR(_sampling_hFabricPort[driverIdx][deviceIdx][portIdx], - &portState); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_STATE_PTR", result); - continue; - } - zes_fabric_port_throughput_t throughput = {0}; - result = ZES_FABRIC_PORT_GET_THROUGHPUT_PTR( - _sampling_hFabricPort[driverIdx][deviceIdx][portIdx], &throughput); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_THROUGHPUT_PTR", result); - continue; - } - do_tracepoint(lttng_ust_ze_sampling, fabricPort, - (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_fabric_port_handle_t)_sampling_hFabricPort[driverIdx][deviceIdx][portIdx], - &portState, &throughput); - } -} -static void readMemModules_dump(uint32_t driverIdx, uint32_t deviceIdx) { - if (!_sampling_memModules_initialized) - return; - ze_result_t result; - for (uint32_t memModuleIdx = 0; memModuleIdx < _sampling_memModuleCount[driverIdx][deviceIdx]; - ++memModuleIdx) { - zes_mem_state_t memState = {0}; - memState.stype = ZES_STRUCTURE_TYPE_MEM_STATE; - zes_mem_bandwidth_t memBandwidth = {0}; - result = ZES_MEMORY_GET_STATE_PTR(_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], - &memState); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_MEMORY_GET_STATE_PTR", result); - continue; - } - result = ZES_MEMORY_GET_BANDWIDTH_PTR(_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], - &memBandwidth); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_MEMORY_GET_BANDWIDTH_PTR", result); - continue; - } - do_tracepoint(lttng_ust_ze_sampling, memStats, - (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_mem_handle_t)_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], - &memState, &memBandwidth); - } -} - -static void readEnergy_dump(uint32_t driverIdx, uint32_t deviceIdx) { - if (!_sampling_pwr_initialized) - return; - ze_result_t result; - for (uint32_t domainIdx = 0; domainIdx < _sampling_powerDomainCounts[driverIdx][deviceIdx]; - domainIdx++) { - zes_power_energy_counter_t energyCounter = {0}; - result = ZES_POWER_GET_ENERGY_COUNTER_PTR(_sampling_hPowers[driverIdx][deviceIdx][domainIdx], - &energyCounter); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_POWER_GET_ENERGY_COUNTER_PTR", result); - continue; - } - do_tracepoint(lttng_ust_ze_sampling, gpu_energy, - (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_pwr_handle_t)_sampling_hPowers[driverIdx][deviceIdx][domainIdx], domainIdx, - &energyCounter); - } -} - -static void readEngines_dump(uint32_t driverIdx, uint32_t deviceIdx) { - if (!_sampling_engines_initialized) - return; - ze_result_t result; - for (uint32_t engineIdx = 0; engineIdx < _sampling_engineCounts[driverIdx][deviceIdx]; - ++engineIdx) { - zes_engine_stats_t engineStats = {0}; - result = ZES_ENGINE_GET_ACTIVITY_PTR(_sampling_engineHandles[driverIdx][deviceIdx][engineIdx], - &engineStats); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_ENGINE_GET_ACTIVITY_PTR", result); - continue; - } - do_tracepoint(lttng_ust_ze_sampling, engineStats, - (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_engine_handle_t)_sampling_engineHandles[driverIdx][deviceIdx][engineIdx], - &engineStats); - } -} - -static void thapi_sampling_energy() { - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - if (tracepoint_enabled(lttng_ust_ze_sampling, gpu_frequency)) { - readFrequency_dump(driverIdx, deviceIdx); - } - if (tracepoint_enabled(lttng_ust_ze_sampling, gpu_energy)) { - readEnergy_dump(driverIdx, deviceIdx); - } - if (tracepoint_enabled(lttng_ust_ze_sampling, engineStats)) { - readEngines_dump(driverIdx, deviceIdx); - } - if (tracepoint_enabled(lttng_ust_ze_sampling, fabricPort)) { - readFabricPorts_dump(driverIdx, deviceIdx); - } - if (tracepoint_enabled(lttng_ust_ze_sampling, memStats)) { - readMemModules_dump(driverIdx, deviceIdx); - } - } - } -} static void _load_tracer(void) { char *s = NULL; void *handle = NULL; int verbose = 0; - struct timespec interval; - thapi_sampling_init(); + //struct timespec interval; + //thapi_sampling_init(); s = getenv("LTTNG_UST_ZE_LIBZE_LOADER"); if (s) @@ -1318,17 +847,8 @@ static void _load_tracer(void) { if (s) _do_paranoid_memory_location = 1; - s = getenv("LTTNG_UST_SAMPLING_ENERGY"); - if (s) { - initializeHandles(); - /* TODO: make it configurable */ - interval.tv_sec = 0; - interval.tv_nsec = 50000000; - thapi_sampling_energy(); - _sampling_handle = thapi_register_sampling(&thapi_sampling_energy, &interval); - } - _do_cleanup = 1; + #ifndef THAPI_USE_DESTRUCTORS atexit(_lib_cleanup); #endif