From 36e185f4fd172c01e5f6db35041382a84bef4f3c Mon Sep 17 00:00:00 2001 From: Solomon Bekele Date: Fri, 8 Nov 2024 14:35:42 +0000 Subject: [PATCH 1/8] sampling_daemon --- xprof/xprof.rb.in | 29 +- ze/Makefile.am | 34 +- ze/sampling_daemon.c | 841 +++++++++++++++++++++++++++++++++ ze/sampling_daemon.h | 7 + ze/tracer_ze_helpers.include.c | 494 +------------------ 5 files changed, 909 insertions(+), 496 deletions(-) create mode 100644 ze/sampling_daemon.c create mode 100644 ze/sampling_daemon.h diff --git a/xprof/xprof.rb.in b/xprof/xprof.rb.in index c2cab278..8dfa3daf 100755 --- a/xprof/xprof.rb.in +++ b/xprof/xprof.rb.in @@ -7,6 +7,10 @@ if Gem::Version.new(RUBY_VERSION) < Gem::Version.new(THAPI_RUBY_MINIMAL_VERSION) exit(1) end +# Define signals for sampling daemon + + + # We Cannot use "@ .. @" for libdir, bindir, and dataroodir # as they will appear as bash "${exec_prefix}/lib" # So for now we will rely on them having the default value, @@ -239,6 +243,9 @@ def lttng_home_dir File.join('/', 'tmp', "lttng_home--#{mpi_job_id}") end + + + def thapi_trace_dir_root raise unless mpi_master? @@ -731,10 +738,24 @@ def gm_rename_folder exec("mv -T #{thapi_trace_dir_tmp_root} #{thapi_trace_dir_root}") unless OPTIONS[:'trace-output'] thapi_trace_dir_root end +SIGRTMIN = 40 +RT_SIGNAL_SAMPLING_READY = SIGRTMIN +RT_SIGNAL_SAMPLING_FINISH = SIGRTMIN + 1 +def start_sampling_daemon() + puts "Started sampling daemon (PID #{Process.pid}})" + sampling_daemon_pid = spawn("sampling_daemon #{Process.pid}") + Process.detach(sampling_daemon_pid) + puts "Started sampling daemon (PID #{sampling_daemon_pid }})" + sampling_daemon_pid +end +def stop_sampling_daemon(sampling_daemon_pid) + Process.kill(RT_SIGNAL_SAMPLING_FINISH, sampling_daemon_pid) + puts "Sent FINISH signal to sampling daemon (PID #{sampling_daemon_pid})" +end # Start, Stop lttng, amd do the on-node analsysis def trace_and_on_node_processing(usr_argv) - def teardown_lttng(syncd, pids) + def teardown_lttng(syncd, pids, sampling_daemon_pid = nil) # We need to be sure that all the local ranks are finished syncd.local_barrier('waiting_for_application_ending') @@ -743,6 +764,7 @@ def trace_and_on_node_processing(usr_argv) # for the early exiting ranks return unless mpi_local_master? + stop_sampling_daemon(sampling_daemon_pid) if sampling_daemon_pid # Stop Lttng session and babeltrace daemons lm_lttng_teardown_session if OPTIONS[:archive] @@ -770,6 +792,9 @@ def trace_and_on_node_processing(usr_argv) end syncd.local_barrier('waiting_for_lttng_setup') + if sampling? + sampling_daemon_pid = start_sampling_daemon() + end # Launch User Command begin @@ -779,7 +804,7 @@ def trace_and_on_node_processing(usr_argv) raise end - teardown_lttng(syncd, pids) + teardown_lttng(syncd, pids, sampling_daemon_pid) return unless mpi_local_master? # Preprocess trace diff --git a/ze/Makefile.am b/ze/Makefile.am index 02d5d7bb..a866613b 100644 --- a/ze/Makefile.am +++ b/ze/Makefile.am @@ -106,6 +106,7 @@ EXTRA_DIST += \ ze_model.rb \ gen_babeltrace_ze_model.rb + ZE_PROBES = $(ZE_NAMESPACES:=_tracepoints) $(ZE_STRUCTS_NAMESPACES:=_tracepoints) ZE_PROBES_TP = $(ZE_PROBES:=.tp) ZE_PROBES_INCL = $(ZE_PROBES:=.h) @@ -139,6 +140,8 @@ CLEANFILES += \ $(ZE_STATIC_PROBES_INCL) \ $(ZE_STATIC_PROBES_SRC) + + EXTRA_DIST += \ gen_probe_base.rb \ $(ZE_GEN_TRACEPOINTS) \ @@ -148,14 +151,22 @@ BUILT_SOURCES = \ $(ZE_PROBES_INCL) \ $(ZE_STATIC_PROBES_INCL) +bin_PROGRAMS = sampling_daemon + +sampling_daemon_SOURCES = sampling_daemon.c +sampling_daemon_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/ze/include -I./ +sampling_daemon_CFLAGS = -Wall -Wextra $(WERROR) $(LTTNG_UST_CFLAGS) +sampling_daemon_LDADD = libzetracepoints.la -ldl -lpthread $(LTTNG_UST_LIBS) ../sampling/libThapiSampling.la + tracer_ze.c: $(srcdir)/gen_ze.rb $(srcdir)/tracer_ze_helpers.include.c $(srcdir)/ze.h.include $(ZE_MODEL) $(ZE_PROBES_INCL) $(ZE_STATIC_PROBES_INCL) SRC_DIR=$(srcdir) $(RUBY) $< > $@ + EXTRA_DIST += \ gen_ze.rb \ tracer_ze_helpers.include.c -CLEANFILES += tracer_ze.c +CLEANFILES += tracer_ze.c sampling_daemon bin_SCRIPTS = \ tracer_ze.sh @@ -172,23 +183,32 @@ libzetracepoints_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/inclu libzetracepoints_la_CFLAGS = -fPIC -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Wno-sign-compare $(WERROR) $(LTTNG_UST_CFLAGS) libzetracepoints_la_LDFLAGS = $(LTTNG_UST_LIBS) -zedir = $(pkglibdir)/ze -ze_LTLIBRARIES = libze_loader.la - -bt2dir = $(pkglibdir)/bt2 -bt2_LTLIBRARIES = libZEInterval.la +lib_LTLIBRARIES = libze_loader.la libZEInterval.la nodist_libze_loader_la_SOURCES = \ $(ZE_PROBES_INCL) \ $(ZE_STATIC_PROBES_INCL) \ tracer_ze.c - + libze_loader_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/sampling -I$(srcdir)/include -I$(top_srcdir)/utils -I./ libze_loader_la_CFLAGS = -Wall -Wextra $(WERROR) $(LIBFFI_CFLAGS) $(LTTNG_UST_CFLAGS) libze_loader_la_LDFLAGS = $(LTTNG_UST_LIBS) -ldl -lpthread $(LIBFFI_LIBS) ../sampling/libThapiSampling.la libze_loader_la_LDFLAGS += -version-info 1:0:0 libze_loader_la_LIBADD = libzetracepoints.la +install-exec-hook: + $(MKDIR_P) $(DESTDIR)$(pkglibdir)/ze + $(LN_S) -f $(DESTDIR)$(libdir)/libze_loader.so.1.0.0 $(DESTDIR)$(pkglibdir)/ze/libze_loader.so.1 + $(LN_S) -f $(DESTDIR)$(pkglibdir)/ze/libze_loader.so.1 $(DESTDIR)$(pkglibdir)/ze/libze_loader.so + $(MKDIR_P) $(DESTDIR)$(pkglibdir)/bt2 + $(LN) -f $(DESTDIR)$(libdir)/libZEInterval.so $(DESTDIR)$(pkglibdir)/bt2/libZEInterval.so + +uninstall-hook: + $(RM) -f $(DESTDIR)$(pkglibdir)/ze/libze_loader.so.1 $(DESTDIR)$(pkglibdir)/ze/libze_loader.so + -rmdir $(DESTDIR)$(pkglibdir)/ze + $(RM) -f $(DESTDIR)$(pkglibdir)/bt2/libZEInterval.so + -rmdir $(DESTDIR)$(pkglibdir)/bt2 + tmplibdir = $(libdir)/tmp install-data-hook: diff --git a/ze/sampling_daemon.c b/ze/sampling_daemon.c new file mode 100644 index 00000000..60fd48b3 --- /dev/null +++ b/ze/sampling_daemon.c @@ -0,0 +1,841 @@ +#include +#include +#include "ze.h.include" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "uthash.h" +#include "utlist.h" + +#include "ze_tracepoints.h" +#include "zet_tracepoints.h" +#include "zes_tracepoints.h" +#include "zel_tracepoints.h" +#include "zex_tracepoints.h" +#include "ze_structs_tracepoints.h" +#include "zet_structs_tracepoints.h" +#include "zes_structs_tracepoints.h" +#include "zel_structs_tracepoints.h" +#include "zex_structs_tracepoints.h" +#include "ze_sampling.h" +#include "ze_profiling.h" +#include "ze_properties.h" +#include "ze_build.h" +#include "sampling_daemon.h" +#include "../sampling/thapi_sampling.h" +#include +#include + +#define RT_SIGNAL_SAMPLING_READY SIGRTMIN +#define RT_SIGNAL_SAMPLING_FINISH SIGRTMIN + 1 + +#define ZES_INIT_PTR zesInit_ptr + +#define ZES_DRIVER_GET_PTR zesDriverGet_ptr + +#define ZES_DEVICE_GET_PTR zesDeviceGet_ptr + +#define ZES_DEVICE_GET_PROPERTIES_PTR zesDeviceGetProperties_ptr + + +#define ZES_DEVICE_ENUM_POWER_DOMAINS_PTR zesDeviceEnumPowerDomains_ptr + +#define ZES_POWER_GET_PROPERTIES_PTR zesPowerGetProperties_ptr + +#define ZES_POWER_GET_ENERGY_COUNTER_PTR zesPowerGetEnergyCounter_ptr + + + +#define ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR zesDeviceEnumFrequencyDomains_ptr + +#define ZES_FREQUENCY_GET_PROPERTIES_PTR zesFrequencyGetProperties_ptr + +#define ZES_FREQUENCY_GET_STATE_PTR zesFrequencyGetState_ptr + + +#define ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR zesDeviceEnumEngineGroups_ptr + +#define ZES_ENGINE_GET_PROPERTIES_PTR zesEngineGetProperties_ptr + +#define ZES_ENGINE_GET_ACTIVITY_PTR zesEngineGetActivity_ptr + + + +#define ZES_DEVICE_ENUM_FABRIC_PORTS_PTR zesDeviceEnumFabricPorts_ptr + +#define ZES_FABRIC_PORT_GET_PROPERTIES_PTR zesFabricPortGetProperties_ptr + +#define ZES_FABRIC_PORT_GET_STATE_PTR zesFabricPortGetState_ptr + +#define ZES_FABRIC_PORT_GET_THROUGHPUT_PTR zesFabricPortGetThroughput_ptr + + + +#define ZES_DEVICE_ENUM_MEMORY_MODULES_PTR zesDeviceEnumMemoryModules_ptr + +#define ZES_MEMORY_GET_PROPERTIES_PTR zesMemoryGetProperties_ptr + +#define ZES_MEMORY_GET_STATE_PTR zesMemoryGetState_ptr + +#define ZES_MEMORY_GET_BANDWIDTH_PTR zesMemoryGetBandwidth_ptr + + + +typedef ze_result_t (*zesInit_t)(zes_init_flags_t flags); +static zesInit_t ZES_INIT_PTR = (void *) 0x0; + +typedef ze_result_t (*zesDriverGet_t)(uint32_t *pCount, zes_driver_handle_t *phDrivers); +static zesDriverGet_t ZES_DRIVER_GET_PTR = (void *) 0x0; + +typedef ze_result_t (*zesDeviceGet_t)(zes_driver_handle_t hDriver, uint32_t *pCount, zes_device_handle_t *phDevices); +static zesDeviceGet_t ZES_DEVICE_GET_PTR = (void *) 0x0; + +typedef ze_result_t (*zesDeviceGetProperties_t)(zes_device_handle_t hDevice, zes_device_properties_t *pProperties); +static zesDeviceGetProperties_t ZES_DEVICE_GET_PROPERTIES_PTR = (void *) 0x0; + + + +typedef ze_result_t (*zesDeviceEnumPowerDomains_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_pwr_handle_t *phPower); +static zesDeviceEnumPowerDomains_t ZES_DEVICE_ENUM_POWER_DOMAINS_PTR = (void *) 0x0; + +typedef ze_result_t (*zesPowerGetProperties_t)(zes_pwr_handle_t hPower, zes_power_properties_t *pProperties); +static zesPowerGetProperties_t ZES_POWER_GET_PROPERTIES_PTR = (void *) 0x0; + +typedef ze_result_t (*zesPowerGetEnergyCounter_t)(zes_pwr_handle_t hPower, zes_power_energy_counter_t *pEnergy); +static zesPowerGetEnergyCounter_t ZES_POWER_GET_ENERGY_COUNTER_PTR = (void *) 0x0; + + + +typedef ze_result_t (*zesDeviceEnumFrequencyDomains_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_freq_handle_t *phFrequency); +static zesDeviceEnumFrequencyDomains_t ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR = (void *) 0x0; + +typedef ze_result_t (*zesFrequencyGetProperties_t)(zes_freq_handle_t hFrequency, zes_freq_properties_t *pProperties); +static zesFrequencyGetProperties_t ZES_FREQUENCY_GET_PROPERTIES_PTR = (void *) 0x0; + +typedef ze_result_t (*zesFrequencyGetState_t)(zes_freq_handle_t hFrequency, zes_freq_state_t *pState); +static zesFrequencyGetState_t ZES_FREQUENCY_GET_STATE_PTR = (void *) 0x0; + + + +typedef ze_result_t (*zesDeviceEnumEngineGroups_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_engine_handle_t *phEngine); +static zesDeviceEnumEngineGroups_t ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR = (void *) 0x0; + +typedef ze_result_t (*zesEngineGetProperties_t)(zes_engine_handle_t hEngine, zes_engine_properties_t *pProperties); +static zesEngineGetProperties_t ZES_ENGINE_GET_PROPERTIES_PTR = (void *) 0x0; + +typedef ze_result_t (*zesEngineGetActivity_t)(zes_engine_handle_t hEngine, zes_engine_stats_t *pStats); +static zesEngineGetActivity_t ZES_ENGINE_GET_ACTIVITY_PTR = (void *) 0x0; + + + +typedef ze_result_t (*zesDeviceEnumFabricPorts_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_fabric_port_handle_t *phPort); +static zesDeviceEnumFabricPorts_t ZES_DEVICE_ENUM_FABRIC_PORTS_PTR = (void *) 0x0; + +typedef ze_result_t (*zesFabricPortGetProperties_t)(zes_fabric_port_handle_t hPort, zes_fabric_port_properties_t *pProperties); +static zesFabricPortGetProperties_t ZES_FABRIC_PORT_GET_PROPERTIES_PTR = (void *) 0x0; + +typedef ze_result_t (*zesFabricPortGetState_t)(zes_fabric_port_handle_t hPort, zes_fabric_port_state_t *pState); +static zesFabricPortGetState_t ZES_FABRIC_PORT_GET_STATE_PTR = (void *) 0x0; + +typedef ze_result_t (*zesFabricPortGetThroughput_t)(zes_fabric_port_handle_t hPort, zes_fabric_port_throughput_t *pThroughput); +static zesFabricPortGetThroughput_t ZES_FABRIC_PORT_GET_THROUGHPUT_PTR = (void *) 0x0; + + +typedef ze_result_t (*zesDeviceEnumMemoryModules_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_mem_handle_t *phMemory); +static zesDeviceEnumMemoryModules_t ZES_DEVICE_ENUM_MEMORY_MODULES_PTR = (void *) 0x0; + +typedef ze_result_t (*zesMemoryGetProperties_t)(zes_mem_handle_t hMemory, zes_mem_properties_t *pProperties); +static zesMemoryGetProperties_t ZES_MEMORY_GET_PROPERTIES_PTR = (void *) 0x0; + +typedef ze_result_t (*zesMemoryGetState_t)(zes_mem_handle_t hMemory, zes_mem_state_t *pState); +static zesMemoryGetState_t ZES_MEMORY_GET_STATE_PTR = (void *) 0x0; + +typedef ze_result_t (*zesMemoryGetBandwidth_t)(zes_mem_handle_t hMemory, zes_mem_bandwidth_t *pBandwidth); +static zesMemoryGetBandwidth_t ZES_MEMORY_GET_BANDWIDTH_PTR = (void *) 0x0; + +static void find_ze_symbols(void * handle, int verbose) { + + ZES_INIT_PTR = (zesInit_t)(intptr_t)dlsym(handle, "zesInit"); + if (!ZES_INIT_PTR && verbose) + fprintf(stderr, "Missing symbol zesInit!\n"); + + ZES_DRIVER_GET_PTR = (zesDriverGet_t)(intptr_t)dlsym(handle, "zesDriverGet"); + if (!ZES_DRIVER_GET_PTR && verbose) + fprintf(stderr, "Missing symbol zesDriverGet!\n"); + +ZES_DEVICE_GET_PTR = (zesDeviceGet_t)(intptr_t)dlsym(handle, "zesDeviceGet"); + if (!ZES_DEVICE_GET_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceGet!\n"); + + ZES_DEVICE_GET_PROPERTIES_PTR = (zesDeviceGetProperties_t)(intptr_t)dlsym(handle, "zesDeviceGetProperties"); + if (!ZES_DEVICE_GET_PROPERTIES_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceGetProperties!\n"); + + + +ZES_DEVICE_ENUM_POWER_DOMAINS_PTR = (zesDeviceEnumPowerDomains_t)(intptr_t)dlsym(handle, "zesDeviceEnumPowerDomains"); + if (!ZES_DEVICE_ENUM_POWER_DOMAINS_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceEnumPowerDomains!\n"); + +ZES_POWER_GET_PROPERTIES_PTR = (zesPowerGetProperties_t)(intptr_t)dlsym(handle, "zesPowerGetProperties"); + if (!ZES_POWER_GET_PROPERTIES_PTR && verbose) + fprintf(stderr, "Missing symbol zesPowerGetProperties!\n"); + + ZES_POWER_GET_ENERGY_COUNTER_PTR = (zesPowerGetEnergyCounter_t)(intptr_t)dlsym(handle, "zesPowerGetEnergyCounter"); + if (!ZES_POWER_GET_ENERGY_COUNTER_PTR && verbose) + fprintf(stderr, "Missing symbol zesPowerGetEnergyCounter!\n"); + + + +ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR = (zesDeviceEnumFrequencyDomains_t)(intptr_t)dlsym(handle, "zesDeviceEnumFrequencyDomains"); + if (!ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceEnumFrequencyDomains!\n"); + + ZES_FREQUENCY_GET_PROPERTIES_PTR = (zesFrequencyGetProperties_t)(intptr_t)dlsym(handle, "zesFrequencyGetProperties"); + if (!ZES_FREQUENCY_GET_PROPERTIES_PTR && verbose) + fprintf(stderr, "Missing symbol zesFrequencyGetProperties!\n"); + + ZES_FREQUENCY_GET_STATE_PTR = (zesFrequencyGetState_t)(intptr_t)dlsym(handle, "zesFrequencyGetState"); + if (!ZES_FREQUENCY_GET_STATE_PTR && verbose) + fprintf(stderr, "Missing symbol zesFrequencyGetState!\n"); + + + +ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR = (zesDeviceEnumEngineGroups_t)(intptr_t)dlsym(handle, "zesDeviceEnumEngineGroups"); + if (!ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceEnumEngineGroups!\n"); + + ZES_ENGINE_GET_PROPERTIES_PTR = (zesEngineGetProperties_t)(intptr_t)dlsym(handle, "zesEngineGetProperties"); + if (!ZES_ENGINE_GET_PROPERTIES_PTR && verbose) + fprintf(stderr, "Missing symbol zesEngineGetProperties!\n"); + + ZES_ENGINE_GET_ACTIVITY_PTR = (zesEngineGetActivity_t)(intptr_t)dlsym(handle, "zesEngineGetActivity"); + if (!ZES_ENGINE_GET_ACTIVITY_PTR && verbose) + fprintf(stderr, "Missing symbol zesEngineGetActivity!\n"); + + + +ZES_DEVICE_ENUM_FABRIC_PORTS_PTR = (zesDeviceEnumFabricPorts_t)(intptr_t)dlsym(handle, "zesDeviceEnumFabricPorts"); + if (!ZES_DEVICE_ENUM_FABRIC_PORTS_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceEnumFabricPorts!\n"); + + ZES_FABRIC_PORT_GET_PROPERTIES_PTR = (zesFabricPortGetProperties_t)(intptr_t)dlsym(handle, "zesFabricPortGetProperties"); + if (!ZES_FABRIC_PORT_GET_PROPERTIES_PTR && verbose) + fprintf(stderr, "Missing symbol zesFabricPortGetProperties!\n"); + +ZES_FABRIC_PORT_GET_STATE_PTR = (zesFabricPortGetState_t)(intptr_t)dlsym(handle, "zesFabricPortGetState"); + if (!ZES_FABRIC_PORT_GET_STATE_PTR && verbose) + fprintf(stderr, "Missing symbol zesFabricPortGetState!\n"); + + ZES_FABRIC_PORT_GET_THROUGHPUT_PTR = (zesFabricPortGetThroughput_t)(intptr_t)dlsym(handle, "zesFabricPortGetThroughput"); + if (!ZES_FABRIC_PORT_GET_THROUGHPUT_PTR && verbose) + fprintf(stderr, "Missing symbol zesFabricPortGetThroughput!\n"); + + + +ZES_DEVICE_ENUM_MEMORY_MODULES_PTR = (zesDeviceEnumMemoryModules_t)(intptr_t)dlsym(handle, "zesDeviceEnumMemoryModules"); + if (!ZES_DEVICE_ENUM_MEMORY_MODULES_PTR && verbose) + fprintf(stderr, "Missing symbol zesDeviceEnumMemoryModules!\n"); + + ZES_MEMORY_GET_PROPERTIES_PTR = (zesMemoryGetProperties_t)(intptr_t)dlsym(handle, "zesMemoryGetProperties"); + if (!ZES_MEMORY_GET_PROPERTIES_PTR && verbose) + fprintf(stderr, "Missing symbol zesMemoryGetProperties!\n"); + + ZES_MEMORY_GET_STATE_PTR = (zesMemoryGetState_t)(intptr_t)dlsym(handle, "zesMemoryGetState"); + if (!ZES_MEMORY_GET_STATE_PTR && verbose) + fprintf(stderr, "Missing symbol zesMemoryGetState!\n"); + + ZES_MEMORY_GET_BANDWIDTH_PTR = (zesMemoryGetBandwidth_t)(intptr_t)dlsym(handle, "zesMemoryGetBandwidth"); + if (!ZES_MEMORY_GET_BANDWIDTH_PTR && verbose) + fprintf(stderr, "Missing symbol zesMemoryGetBandwidth!\n"); + +} + +thapi_sampling_handle_t _sampling_handle = NULL; +static int _sampling_freq_initialized = 0; +static int _sampling_fabricPorts_initialized = 0; +static int _sampling_memModules_initialized = 0; +static int _sampling_pwr_initialized = 0; +static int _sampling_engines_initialized = 0; +// Static handles to stay throughout the execution +static zes_driver_handle_t *_sampling_hDrivers = NULL; +static zes_device_handle_t **_sampling_hDevices = NULL; +static zes_freq_handle_t ***_sampling_hFrequencies = NULL; +static zes_pwr_handle_t ***_sampling_hPowers = NULL; +static zes_engine_handle_t ***_sampling_engineHandles = NULL; +static zes_fabric_port_handle_t ***_sampling_hFabricPort = NULL; +static zes_mem_handle_t ***_sampling_hMemModule = NULL; +static uint32_t _sampling_driverCount = 0; +static uint32_t *_sampling_deviceCount = NULL; +static uint32_t **_sampling_freqDomainCounts = NULL; +static uint32_t **_sampling_fabricPortCount = NULL; +static uint32_t **_sampling_memModuleCount = NULL; +static uint32_t **_sampling_powerDomainCounts = NULL; +static uint32_t **_sampling_engineCounts = NULL; + + +//////////////////////////////////////////// +#define _ZE_ERROR_MSG(NAME,RES) do {\ + fprintf(stderr,"%s() failed at %d(%s): res=%x\n",(NAME),__LINE__,__FILE__,(RES));\ +} while (0) +#define _ZE_ERROR_MSG_NOTERMINATE(NAME,RES) do {\ + fprintf(stderr,"%s() error at %d(%s): res=%x\n",(NAME),__LINE__,__FILE__,(RES));\ +} while (0) +#define _ERROR_MSG(MSG) {perror((MSG)) do {\ + {perror((MSG)); fprintf(stderr,"errno=%d at %d(%s)",errno,__LINE__,__FILE__);\ +} while (0) + +static void intializeFrequency() { + ze_result_t res; + _sampling_hFrequencies = + (zes_freq_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_freq_handle_t **)); + _sampling_freqDomainCounts = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + _sampling_freqDomainCounts[driverIdx] = + (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); + _sampling_hFrequencies[driverIdx] = + (zes_freq_handle_t **)calloc(_sampling_deviceCount[driverIdx], sizeof(zes_freq_handle_t *)); + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + // Get frequency domains for each device + res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_freqDomainCounts[driverIdx][deviceIdx], + NULL); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); + _sampling_freqDomainCounts[driverIdx][deviceIdx] = 0; + continue; + } + _sampling_hFrequencies[driverIdx][deviceIdx] = (zes_freq_handle_t *)calloc( + _sampling_freqDomainCounts[driverIdx][deviceIdx], sizeof(zes_freq_handle_t)); + res =ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_freqDomainCounts[driverIdx][deviceIdx], + _sampling_hFrequencies[driverIdx][deviceIdx]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); + _sampling_freqDomainCounts[driverIdx][deviceIdx] = 0; + free(_sampling_hFrequencies[driverIdx][deviceIdx]); + } + for (uint32_t domainIdx = 0; domainIdx < _sampling_freqDomainCounts[driverIdx][deviceIdx]; + domainIdx++) { + zes_freq_properties_t freqProps = {0}; + freqProps.stype = ZES_STRUCTURE_TYPE_FREQ_PROPERTIES; + res =ZES_FREQUENCY_GET_PROPERTIES_PTR( + _sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], &freqProps); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_FREQUENCY_GET_PROPERTIES_PTR", res); + free(_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx]); + } + do_tracepoint(lttng_ust_ze_sampling, freqProperties, + (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_freq_handle_t)_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], + &freqProps); + } + } + } + _sampling_freq_initialized = 1; +} + +static void intializePower() { + ze_result_t res; + _sampling_hPowers = + (zes_pwr_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_pwr_handle_t **)); + _sampling_powerDomainCounts = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + _sampling_hPowers[driverIdx] = + (zes_pwr_handle_t **)calloc(_sampling_deviceCount[driverIdx], sizeof(zes_pwr_handle_t *)); + _sampling_powerDomainCounts[driverIdx] = + (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + // Get power domains for each device + res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_powerDomainCounts[driverIdx][deviceIdx], + NULL); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); + _sampling_powerDomainCounts[driverIdx][deviceIdx] = 0; + continue; + } + _sampling_hPowers[driverIdx][deviceIdx] = (zes_pwr_handle_t *)calloc( + _sampling_powerDomainCounts[driverIdx][deviceIdx], sizeof(zes_pwr_handle_t)); + res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_powerDomainCounts[driverIdx][deviceIdx], + _sampling_hPowers[driverIdx][deviceIdx]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); + _sampling_powerDomainCounts[driverIdx][deviceIdx] = 0; + free(_sampling_hPowers[driverIdx][deviceIdx]); + } + for (uint32_t domainIdx = 0; domainIdx < _sampling_powerDomainCounts[driverIdx][deviceIdx]; + domainIdx++) { + zes_power_properties_t powerProperties = {0}; + powerProperties.stype = ZES_STRUCTURE_TYPE_POWER_PROPERTIES; + res = ZES_POWER_GET_PROPERTIES_PTR(_sampling_hPowers[driverIdx][deviceIdx][domainIdx], + &powerProperties); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_POWER_GET_PROPERTIES_PTR", res); + free(_sampling_hPowers[driverIdx][deviceIdx][domainIdx]); + } + do_tracepoint(lttng_ust_ze_sampling, powerProperties, + (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_pwr_handle_t)_sampling_hPowers[driverIdx][deviceIdx][domainIdx], + &powerProperties); + } + } + } + _sampling_pwr_initialized = 1; +} + +static void intializeEngines() { + ze_result_t res; + _sampling_engineHandles = + (zes_engine_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_engine_handle_t **)); + _sampling_engineCounts = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + _sampling_engineHandles[driverIdx] = (zes_engine_handle_t **)calloc( + _sampling_deviceCount[driverIdx], sizeof(zes_engine_handle_t *)); + _sampling_engineCounts[driverIdx] = + (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + // Get engine counts for each device + res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_engineCounts[driverIdx][deviceIdx], NULL); + if (res != ZE_RESULT_SUCCESS || _sampling_engineCounts[driverIdx][deviceIdx] == 0) { + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); + _sampling_engineCounts[driverIdx][deviceIdx] = 0; + continue; + } + _sampling_engineHandles[driverIdx][deviceIdx] = (zes_engine_handle_t *)calloc( + _sampling_engineCounts[driverIdx][deviceIdx], sizeof(zes_engine_handle_t)); + res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_engineCounts[driverIdx][deviceIdx], + _sampling_engineHandles[driverIdx][deviceIdx]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); + _sampling_engineCounts[driverIdx][deviceIdx] = 0; + free(_sampling_engineHandles[driverIdx][deviceIdx]); + } + for (uint32_t engineIdx = 0; engineIdx < _sampling_engineCounts[driverIdx][deviceIdx]; + ++engineIdx) { + zes_engine_properties_t engineProps = {0}; + engineProps.stype = ZES_STRUCTURE_TYPE_ENGINE_PROPERTIES; + res = ZES_ENGINE_GET_PROPERTIES_PTR( + _sampling_engineHandles[driverIdx][deviceIdx][engineIdx], &engineProps); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_ENGINE_GET_PROPERTIES_PTR", res); + } + do_tracepoint(lttng_ust_ze_sampling, engineProperties, + (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_engine_handle_t)_sampling_engineHandles[driverIdx][deviceIdx][engineIdx], + &engineProps); + } + } + } + _sampling_engines_initialized = 1; +} + + +static void intializeFabricPorts() { + ze_result_t res; + _sampling_hFabricPort = (zes_fabric_port_handle_t ***)calloc(_sampling_driverCount, + sizeof(zes_fabric_port_handle_t **)); + _sampling_fabricPortCount = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + _sampling_fabricPortCount[driverIdx] = + (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); + _sampling_hFabricPort[driverIdx] = (zes_fabric_port_handle_t **)calloc( + _sampling_deviceCount[driverIdx], sizeof(zes_fabric_port_handle_t *)); + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + // Get fabric ports for each device + res = + ZES_DEVICE_ENUM_FABRIC_PORTS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_fabricPortCount[driverIdx][deviceIdx], NULL); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FABRIC_PORTS_PTR", res); + _sampling_fabricPortCount[driverIdx][deviceIdx] = 0; + continue; + } + _sampling_hFabricPort[driverIdx][deviceIdx] = (zes_fabric_port_handle_t *)calloc( + _sampling_fabricPortCount[driverIdx][deviceIdx], sizeof(zes_fabric_port_handle_t)); + res = ZES_DEVICE_ENUM_FABRIC_PORTS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_fabricPortCount[driverIdx][deviceIdx], + _sampling_hFabricPort[driverIdx][deviceIdx]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_FABRIC_PORTS_PTR", res); + _sampling_fabricPortCount[driverIdx][deviceIdx] = 0; + free(_sampling_hFabricPort[driverIdx][deviceIdx]); + } + for (uint32_t fabricPortIdx = 0; + fabricPortIdx < _sampling_fabricPortCount[driverIdx][deviceIdx]; ++fabricPortIdx) { + + zes_fabric_port_properties_t fabricPortProps = {0}; + res = ZES_FABRIC_PORT_GET_PROPERTIES_PTR( + _sampling_hFabricPort[driverIdx][deviceIdx][fabricPortIdx], &fabricPortProps); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_PROPERTIES_PTR", res); + } + // Dump fabricPortProperties once + do_tracepoint( + lttng_ust_ze_sampling, fabricPortProperties, + (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_fabric_port_handle_t)_sampling_hFabricPort[driverIdx][deviceIdx][fabricPortIdx], + &fabricPortProps); + } + } + } + _sampling_fabricPorts_initialized = 1; +} + + +static void intializeMemModules() { + ze_result_t res; + _sampling_hMemModule = + (zes_mem_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_mem_handle_t **)); + _sampling_memModuleCount = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + _sampling_memModuleCount[driverIdx] = + (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); + _sampling_hMemModule[driverIdx] = + (zes_mem_handle_t **)calloc(_sampling_deviceCount[driverIdx], sizeof(zes_mem_handle_t *)); + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + // Get fabric ports for each device + res = + ZES_DEVICE_ENUM_MEMORY_MODULES_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_memModuleCount[driverIdx][deviceIdx], NULL); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_MEMORY_MODULES_PTR", res); + _sampling_memModuleCount[driverIdx][deviceIdx] = 0; + continue; + } + _sampling_hMemModule[driverIdx][deviceIdx] = (zes_mem_handle_t *)calloc( + _sampling_memModuleCount[driverIdx][deviceIdx], sizeof(zes_mem_handle_t)); + res = ZES_DEVICE_ENUM_MEMORY_MODULES_PTR(_sampling_hDevices[driverIdx][deviceIdx], + &_sampling_memModuleCount[driverIdx][deviceIdx], + _sampling_hMemModule[driverIdx][deviceIdx]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_MEMORY_MODULES_PTR", res); + _sampling_memModuleCount[driverIdx][deviceIdx] = 0; + free(_sampling_hMemModule[driverIdx][deviceIdx]); + } + for (uint32_t memModuleIdx = 0; memModuleIdx < _sampling_memModuleCount[driverIdx][deviceIdx]; + ++memModuleIdx) { + zes_mem_properties_t memProps = {0}; + memProps.stype = ZES_STRUCTURE_TYPE_MEM_PROPERTIES; + res = ZES_MEMORY_GET_PROPERTIES_PTR( + _sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], &memProps); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_MEMORY_GET_PROPERTIES_PTR", res); + } + // Dump fabricPortProperties once + do_tracepoint(lttng_ust_ze_sampling, memoryProperties, + (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_mem_handle_t)_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], + &memProps); + } + } + } + _sampling_memModules_initialized = 1; +} + + +static int initializeHandles() { + ze_result_t res; + //find_ze_symbols(handle, NULL); + res = ZES_INIT_PTR(0); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_INIT_PTR", res); + return -1; + } + + // Query driver + _sampling_driverCount = 0; + res = ZES_DRIVER_GET_PTR(&_sampling_driverCount, NULL); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("1st ZES_DRIVER_GET_PTR", res); + return -1; + } + _sampling_hDrivers = + (zes_driver_handle_t *)calloc(_sampling_driverCount, sizeof(zes_driver_handle_t)); + res = ZES_DRIVER_GET_PTR(&_sampling_driverCount, _sampling_hDrivers); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DRIVER_GET_PTR", res); + return -1; + } + _sampling_deviceCount = (uint32_t *)calloc(_sampling_driverCount, sizeof(uint32_t)); + _sampling_hDevices = + (zes_device_handle_t **)calloc(_sampling_driverCount, sizeof(zes_device_handle_t *)); + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + res = + ZES_DEVICE_GET_PTR(_sampling_hDrivers[driverIdx], &_sampling_deviceCount[driverIdx], NULL); + if (res != ZE_RESULT_SUCCESS || _sampling_deviceCount[driverIdx] == 0) { + fprintf(stderr, "ERROR: No device found!\n"); + _ZE_ERROR_MSG("1st ZES_DEVICE_GET_PTR", res); + return -1; + } + _sampling_hDevices[driverIdx] = (zes_device_handle_t *)calloc(_sampling_deviceCount[driverIdx], + sizeof(zes_device_handle_t)); + res = ZES_DEVICE_GET_PTR(_sampling_hDrivers[driverIdx], &_sampling_deviceCount[driverIdx], + _sampling_hDevices[driverIdx]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_GET_PTR", res); + free(_sampling_hDevices[driverIdx]); + return -1; + } + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + + zes_device_properties_t deviceProps = {0}; + deviceProps.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES; + deviceProps.pNext = NULL; + res = ZES_DEVICE_GET_PROPERTIES_PTR(_sampling_hDevices[driverIdx][deviceIdx], &deviceProps); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_DEVICE_GET_PROPERTIES_PTR", res); + } + do_tracepoint(lttng_ust_ze_sampling, deviceProperties, + (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], deviceIdx, + &deviceProps); + } + } + intializeFrequency(); + intializePower(); + intializeEngines(); + intializeFabricPorts(); + intializeMemModules(); + return 0; +} + +static void readFrequency_dump(uint32_t driverIdx, uint32_t deviceIdx) { + if (!_sampling_freq_initialized) + return; + ze_result_t result; + for (uint32_t domainIdx = 0; domainIdx < _sampling_freqDomainCounts[driverIdx][deviceIdx]; + domainIdx++) { + zes_freq_state_t freqState = {0}; + result = ZES_FREQUENCY_GET_STATE_PTR(_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], + &freqState); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_FREQUENCY_GET_STATE_PTR", result); + continue; + } + do_tracepoint(lttng_ust_ze_sampling, gpu_frequency, + (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_freq_handle_t)_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], + domainIdx, &freqState); + } +} + +static void readFabricPorts_dump(uint32_t driverIdx, uint32_t deviceIdx) { + if (!_sampling_fabricPorts_initialized) + return; + ze_result_t result; + for (uint32_t portIdx = 0; portIdx < _sampling_fabricPortCount[driverIdx][deviceIdx]; portIdx++) { + zes_fabric_port_state_t portState = {0}; + portState.pNext = NULL; + portState.stype = ZES_STRUCTURE_TYPE_FABRIC_PORT_STATE; + result = ZES_FABRIC_PORT_GET_STATE_PTR(_sampling_hFabricPort[driverIdx][deviceIdx][portIdx], + &portState); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_STATE_PTR", result); + continue; + } + zes_fabric_port_throughput_t throughput = {0}; + result = ZES_FABRIC_PORT_GET_THROUGHPUT_PTR( + _sampling_hFabricPort[driverIdx][deviceIdx][portIdx], &throughput); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_THROUGHPUT_PTR", result); + continue; + } + do_tracepoint(lttng_ust_ze_sampling, fabricPort, + (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_fabric_port_handle_t)_sampling_hFabricPort[driverIdx][deviceIdx][portIdx], + &portState, &throughput); + } +} + + +static void readMemModules_dump(uint32_t driverIdx, uint32_t deviceIdx) { + if (!_sampling_memModules_initialized) + return; + ze_result_t result; + for (uint32_t memModuleIdx = 0; memModuleIdx < _sampling_memModuleCount[driverIdx][deviceIdx]; + ++memModuleIdx) { + zes_mem_state_t memState = {0}; + memState.stype = ZES_STRUCTURE_TYPE_MEM_STATE; + zes_mem_bandwidth_t memBandwidth = {0}; + result = ZES_MEMORY_GET_STATE_PTR(_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], + &memState); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_MEMORY_GET_STATE_PTR", result); + continue; + } + result = ZES_MEMORY_GET_BANDWIDTH_PTR(_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], + &memBandwidth); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_MEMORY_GET_BANDWIDTH_PTR", result); + continue; + } + do_tracepoint(lttng_ust_ze_sampling, memStats, + (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_mem_handle_t)_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], + &memState, &memBandwidth); + } +} + + +static void readEnergy_dump(uint32_t driverIdx, uint32_t deviceIdx) { + if (!_sampling_pwr_initialized) + return; + ze_result_t result; + for (uint32_t domainIdx = 0; domainIdx < _sampling_powerDomainCounts[driverIdx][deviceIdx]; + domainIdx++) { + zes_power_energy_counter_t energyCounter = {0}; + result = ZES_POWER_GET_ENERGY_COUNTER_PTR(_sampling_hPowers[driverIdx][deviceIdx][domainIdx], + &energyCounter); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_POWER_GET_ENERGY_COUNTER_PTR", result); + continue; + } + do_tracepoint(lttng_ust_ze_sampling, gpu_energy, + (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_pwr_handle_t)_sampling_hPowers[driverIdx][deviceIdx][domainIdx], domainIdx, + &energyCounter); + } +} + +static void readEngines_dump(uint32_t driverIdx, uint32_t deviceIdx) { + if (!_sampling_engines_initialized) + return; + ze_result_t result; + for (uint32_t engineIdx = 0; engineIdx < _sampling_engineCounts[driverIdx][deviceIdx]; + ++engineIdx) { + zes_engine_stats_t engineStats = {0}; + result = ZES_ENGINE_GET_ACTIVITY_PTR(_sampling_engineHandles[driverIdx][deviceIdx][engineIdx], + &engineStats); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_ENGINE_GET_ACTIVITY_PTR", result); + continue; + } + do_tracepoint(lttng_ust_ze_sampling, engineStats, + (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], + (zes_engine_handle_t)_sampling_engineHandles[driverIdx][deviceIdx][engineIdx], + &engineStats); + } +} + + +static void thapi_sampling_energy() { + for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { + for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { + if (tracepoint_enabled(lttng_ust_ze_sampling, gpu_frequency)) { + readFrequency_dump(driverIdx, deviceIdx); + } + if (tracepoint_enabled(lttng_ust_ze_sampling, gpu_energy)) { + readEnergy_dump(driverIdx, deviceIdx); + } + if (tracepoint_enabled(lttng_ust_ze_sampling, engineStats)) { + readEngines_dump(driverIdx, deviceIdx); + } + if (tracepoint_enabled(lttng_ust_ze_sampling, fabricPort)) { + readFabricPorts_dump(driverIdx, deviceIdx); + } + if (tracepoint_enabled(lttng_ust_ze_sampling, memStats)) { + readMemModules_dump(driverIdx, deviceIdx); + } + } + } +} + +void process_sampling() { + + struct timespec interval; + interval.tv_sec = 0; + interval.tv_nsec = 50000000; // 50ms interval + thapi_sampling_energy(); + _sampling_handle = thapi_register_sampling(&thapi_sampling_energy, &interval); + +} +void cleanup_sampling() { + if (_sampling_handle) { + thapi_unregister_sampling(_sampling_handle); + _sampling_handle = NULL; + } +} + +// Signal handling loop +int signal_loop(int parent_pid) { + // Initialize signal set and add signals + sigset_t signal_set; + sigemptyset(&signal_set); + sigaddset(&signal_set, RT_SIGNAL_SAMPLING_READY); + sigaddset(&signal_set, RT_SIGNAL_SAMPLING_FINISH); + + // Block signals + sigprocmask(SIG_BLOCK, &signal_set, NULL); + + // Signal the parent process READY + kill(parent_pid, RT_SIGNAL_SAMPLING_READY); + + // Processing loop: until RT_SIGNAL_FINISH + while (1) { + int signum; + sigwait(&signal_set, &signum); + + if (signum == RT_SIGNAL_SAMPLING_FINISH) { + return 0; + } else { + // Example action when READY signal is received + process_sampling(); + printf("Starting \n"); + kill(parent_pid, RT_SIGNAL_SAMPLING_READY); // Signal parent + } + } + + // Unreachable + fprintf(stderr, "Exited signal loop unexpectedly.\n"); + return 1; +} + +int main(int argc, char **argv) { + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + int parent_pid = atoi(argv[1]); + int verbose = 0; + thapi_sampling_init(); + // Load necessary libraries + void *handle = NULL; + char *s = getenv("LTTNG_UST_ZE_LIBZE_LOADER"); + if (s) { + handle = dlopen(s, RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); + } else { + handle = dlopen("libze_loader.so", RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); + } + + if (!handle) { + fprintf(stderr, "Failure: could not load ze library!\n"); + return 1; + } + + // Initialize daemon + if (getenv("LTTNG_UST_SAMPLING_ENERGY")) { + find_ze_symbols(handle, verbose); + initializeHandles(); + } else { + fprintf(stderr, "Sampling not enabled. Exiting.\n"); + dlclose(handle); + return 0; + } + // Run the signal loop + int ret = signal_loop(parent_pid); + // Cleanup before exiting + cleanup_sampling(); + dlclose(handle); + printf("Daemon exiting with status %d\n", ret); + kill(parent_pid, RT_SIGNAL_SAMPLING_READY); // Notify parent of clean exit + return ret; +} diff --git a/ze/sampling_daemon.h b/ze/sampling_daemon.h new file mode 100644 index 00000000..4f4cd6ed --- /dev/null +++ b/ze/sampling_daemon.h @@ -0,0 +1,7 @@ +#ifndef SAMPLING_DAEMON_H +#define SAMPLING_DAEMON_H + +void initialize_sampling(); +void cleanup_sampling(); + +#endif // SAMPLING_DAEMON_H \ No newline at end of file diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index 8a18f161..ebdc7369 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -1,4 +1,5 @@ -#include "thapi_sampling.h" +//#include "thapi_sampling.h" +//#include "sampling_daemon.h" #ifdef THAPI_DEBUG #define TAHPI_LOG stderr @@ -40,7 +41,7 @@ static int _do_cleanup = 0; static int _do_chained_structs = 0; static int _do_paranoid_drift = 0; static int _do_paranoid_memory_location = 0; -thapi_sampling_handle_t _sampling_handle = NULL; +//thapi_sampling_handle_t _sampling_handle = NULL; pthread_mutex_t ze_closures_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -641,9 +642,7 @@ _lib_cleanup() { if (_do_cleanup) { if (_do_profile) _event_cleanup(); - if (_sampling_handle) - thapi_unregister_sampling(_sampling_handle); - } + } } static void _dump_driver_subdevice_properties(ze_driver_handle_t hDriver, ze_device_handle_t hDevice) { @@ -789,484 +788,14 @@ static inline void _dump_memory_info(ze_command_list_handle_t hCommandList, cons {perror((MSG)); fprintf(stderr,"errno=%d at %d(%s)",errno,__LINE__,__FILE__);\ } while (0) -static int _sampling_freq_initialized = 0; -static int _sampling_fabricPorts_initialized = 0; -static int _sampling_memModules_initialized = 0; -static int _sampling_pwr_initialized = 0; -static int _sampling_engines_initialized = 0; -// Static handles to stay throughout the execution -static zes_driver_handle_t *_sampling_hDrivers = NULL; -static zes_device_handle_t **_sampling_hDevices = NULL; -static zes_freq_handle_t ***_sampling_hFrequencies = NULL; -static zes_pwr_handle_t ***_sampling_hPowers = NULL; -static zes_engine_handle_t ***_sampling_engineHandles = NULL; -static zes_fabric_port_handle_t ***_sampling_hFabricPort = NULL; -static zes_mem_handle_t ***_sampling_hMemModule = NULL; -static uint32_t _sampling_driverCount = 0; -static uint32_t *_sampling_deviceCount = NULL; -static uint32_t **_sampling_freqDomainCounts = NULL; -static uint32_t **_sampling_fabricPortCount = NULL; -static uint32_t **_sampling_memModuleCount = NULL; -static uint32_t **_sampling_powerDomainCounts = NULL; -static uint32_t **_sampling_engineCounts = NULL; - -static void intializeFrequency() { - ze_result_t res; - _sampling_hFrequencies = - (zes_freq_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_freq_handle_t **)); - _sampling_freqDomainCounts = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - _sampling_freqDomainCounts[driverIdx] = - (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); - _sampling_hFrequencies[driverIdx] = - (zes_freq_handle_t **)calloc(_sampling_deviceCount[driverIdx], sizeof(zes_freq_handle_t *)); - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - // Get frequency domains for each device - res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_freqDomainCounts[driverIdx][deviceIdx], - NULL); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); - _sampling_freqDomainCounts[driverIdx][deviceIdx] = 0; - continue; - } - _sampling_hFrequencies[driverIdx][deviceIdx] = (zes_freq_handle_t *)calloc( - _sampling_freqDomainCounts[driverIdx][deviceIdx], sizeof(zes_freq_handle_t)); - res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_freqDomainCounts[driverIdx][deviceIdx], - _sampling_hFrequencies[driverIdx][deviceIdx]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); - _sampling_freqDomainCounts[driverIdx][deviceIdx] = 0; - free(_sampling_hFrequencies[driverIdx][deviceIdx]); - } - for (uint32_t domainIdx = 0; domainIdx < _sampling_freqDomainCounts[driverIdx][deviceIdx]; - domainIdx++) { - zes_freq_properties_t freqProps = {0}; - freqProps.stype = ZES_STRUCTURE_TYPE_FREQ_PROPERTIES; - res = ZES_FREQUENCY_GET_PROPERTIES_PTR( - _sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], &freqProps); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_FREQUENCY_GET_PROPERTIES_PTR", res); - free(_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx]); - } - do_tracepoint(lttng_ust_ze_sampling, freqProperties, - (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_freq_handle_t)_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], - &freqProps); - } - } - } - _sampling_freq_initialized = 1; -} - -static void intializePower() { - ze_result_t res; - _sampling_hPowers = - (zes_pwr_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_pwr_handle_t **)); - _sampling_powerDomainCounts = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - _sampling_hPowers[driverIdx] = - (zes_pwr_handle_t **)calloc(_sampling_deviceCount[driverIdx], sizeof(zes_pwr_handle_t *)); - _sampling_powerDomainCounts[driverIdx] = - (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - // Get power domains for each device - res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_powerDomainCounts[driverIdx][deviceIdx], - NULL); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); - _sampling_powerDomainCounts[driverIdx][deviceIdx] = 0; - continue; - } - _sampling_hPowers[driverIdx][deviceIdx] = (zes_pwr_handle_t *)calloc( - _sampling_powerDomainCounts[driverIdx][deviceIdx], sizeof(zes_pwr_handle_t)); - res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_powerDomainCounts[driverIdx][deviceIdx], - _sampling_hPowers[driverIdx][deviceIdx]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); - _sampling_powerDomainCounts[driverIdx][deviceIdx] = 0; - free(_sampling_hPowers[driverIdx][deviceIdx]); - } - for (uint32_t domainIdx = 0; domainIdx < _sampling_powerDomainCounts[driverIdx][deviceIdx]; - domainIdx++) { - zes_power_properties_t powerProperties = {0}; - powerProperties.stype = ZES_STRUCTURE_TYPE_POWER_PROPERTIES; - res = ZES_POWER_GET_PROPERTIES_PTR(_sampling_hPowers[driverIdx][deviceIdx][domainIdx], - &powerProperties); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_POWER_GET_PROPERTIES_PTR", res); - free(_sampling_hPowers[driverIdx][deviceIdx][domainIdx]); - } - do_tracepoint(lttng_ust_ze_sampling, powerProperties, - (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_pwr_handle_t)_sampling_hPowers[driverIdx][deviceIdx][domainIdx], - &powerProperties); - } - } - } - _sampling_pwr_initialized = 1; -} - -static void intializeEngines() { - ze_result_t res; - _sampling_engineHandles = - (zes_engine_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_engine_handle_t **)); - _sampling_engineCounts = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - _sampling_engineHandles[driverIdx] = (zes_engine_handle_t **)calloc( - _sampling_deviceCount[driverIdx], sizeof(zes_engine_handle_t *)); - _sampling_engineCounts[driverIdx] = - (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - // Get engine counts for each device - res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_engineCounts[driverIdx][deviceIdx], NULL); - if (res != ZE_RESULT_SUCCESS || _sampling_engineCounts[driverIdx][deviceIdx] == 0) { - _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); - _sampling_engineCounts[driverIdx][deviceIdx] = 0; - continue; - } - _sampling_engineHandles[driverIdx][deviceIdx] = (zes_engine_handle_t *)calloc( - _sampling_engineCounts[driverIdx][deviceIdx], sizeof(zes_engine_handle_t)); - res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_engineCounts[driverIdx][deviceIdx], - _sampling_engineHandles[driverIdx][deviceIdx]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); - _sampling_engineCounts[driverIdx][deviceIdx] = 0; - free(_sampling_engineHandles[driverIdx][deviceIdx]); - } - for (uint32_t engineIdx = 0; engineIdx < _sampling_engineCounts[driverIdx][deviceIdx]; - ++engineIdx) { - zes_engine_properties_t engineProps = {0}; - engineProps.stype = ZES_STRUCTURE_TYPE_ENGINE_PROPERTIES; - res = ZES_ENGINE_GET_PROPERTIES_PTR( - _sampling_engineHandles[driverIdx][deviceIdx][engineIdx], &engineProps); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_ENGINE_GET_PROPERTIES_PTR", res); - } - do_tracepoint(lttng_ust_ze_sampling, engineProperties, - (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_engine_handle_t)_sampling_engineHandles[driverIdx][deviceIdx][engineIdx], - &engineProps); - } - } - } - _sampling_engines_initialized = 1; -} - -static void intializeFabricPorts() { - ze_result_t res; - _sampling_hFabricPort = (zes_fabric_port_handle_t ***)calloc(_sampling_driverCount, - sizeof(zes_fabric_port_handle_t **)); - _sampling_fabricPortCount = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - _sampling_fabricPortCount[driverIdx] = - (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); - _sampling_hFabricPort[driverIdx] = (zes_fabric_port_handle_t **)calloc( - _sampling_deviceCount[driverIdx], sizeof(zes_fabric_port_handle_t *)); - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - // Get fabric ports for each device - res = - ZES_DEVICE_ENUM_FABRIC_PORTS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_fabricPortCount[driverIdx][deviceIdx], NULL); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FABRIC_PORTS_PTR", res); - _sampling_fabricPortCount[driverIdx][deviceIdx] = 0; - continue; - } - _sampling_hFabricPort[driverIdx][deviceIdx] = (zes_fabric_port_handle_t *)calloc( - _sampling_fabricPortCount[driverIdx][deviceIdx], sizeof(zes_fabric_port_handle_t)); - res = ZES_DEVICE_ENUM_FABRIC_PORTS_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_fabricPortCount[driverIdx][deviceIdx], - _sampling_hFabricPort[driverIdx][deviceIdx]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_FABRIC_PORTS_PTR", res); - _sampling_fabricPortCount[driverIdx][deviceIdx] = 0; - free(_sampling_hFabricPort[driverIdx][deviceIdx]); - } - for (uint32_t fabricPortIdx = 0; - fabricPortIdx < _sampling_fabricPortCount[driverIdx][deviceIdx]; ++fabricPortIdx) { - - zes_fabric_port_properties_t fabricPortProps = {0}; - res = ZES_FABRIC_PORT_GET_PROPERTIES_PTR( - _sampling_hFabricPort[driverIdx][deviceIdx][fabricPortIdx], &fabricPortProps); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_PROPERTIES_PTR", res); - } - // Dump fabricPortProperties once - do_tracepoint( - lttng_ust_ze_sampling, fabricPortProperties, - (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_fabric_port_handle_t)_sampling_hFabricPort[driverIdx][deviceIdx][fabricPortIdx], - &fabricPortProps); - } - } - } - _sampling_fabricPorts_initialized = 1; -} - -static void intializeMemModules() { - ze_result_t res; - _sampling_hMemModule = - (zes_mem_handle_t ***)calloc(_sampling_driverCount, sizeof(zes_mem_handle_t **)); - _sampling_memModuleCount = (uint32_t **)calloc(_sampling_driverCount, sizeof(uint32_t *)); - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - _sampling_memModuleCount[driverIdx] = - (uint32_t *)calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); - _sampling_hMemModule[driverIdx] = - (zes_mem_handle_t **)calloc(_sampling_deviceCount[driverIdx], sizeof(zes_mem_handle_t *)); - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - // Get fabric ports for each device - res = - ZES_DEVICE_ENUM_MEMORY_MODULES_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_memModuleCount[driverIdx][deviceIdx], NULL); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_MEMORY_MODULES_PTR", res); - _sampling_memModuleCount[driverIdx][deviceIdx] = 0; - continue; - } - _sampling_hMemModule[driverIdx][deviceIdx] = (zes_mem_handle_t *)calloc( - _sampling_memModuleCount[driverIdx][deviceIdx], sizeof(zes_mem_handle_t)); - res = ZES_DEVICE_ENUM_MEMORY_MODULES_PTR(_sampling_hDevices[driverIdx][deviceIdx], - &_sampling_memModuleCount[driverIdx][deviceIdx], - _sampling_hMemModule[driverIdx][deviceIdx]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_MEMORY_MODULES_PTR", res); - _sampling_memModuleCount[driverIdx][deviceIdx] = 0; - free(_sampling_hMemModule[driverIdx][deviceIdx]); - } - for (uint32_t memModuleIdx = 0; memModuleIdx < _sampling_memModuleCount[driverIdx][deviceIdx]; - ++memModuleIdx) { - zes_mem_properties_t memProps = {0}; - memProps.stype = ZES_STRUCTURE_TYPE_MEM_PROPERTIES; - res = ZES_MEMORY_GET_PROPERTIES_PTR( - _sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], &memProps); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_MEMORY_GET_PROPERTIES_PTR", res); - } - // Dump fabricPortProperties once - do_tracepoint(lttng_ust_ze_sampling, memoryProperties, - (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_mem_handle_t)_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], - &memProps); - } - } - } - _sampling_memModules_initialized = 1; -} - -static int initializeHandles() { - ze_result_t res; - res = ZES_INIT_PTR(0); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_INIT_PTR", res); - return -1; - } - - // Query driver - _sampling_driverCount = 0; - res = ZES_DRIVER_GET_PTR(&_sampling_driverCount, NULL); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("1st ZES_DRIVER_GET_PTR", res); - return -1; - } - _sampling_hDrivers = - (zes_driver_handle_t *)calloc(_sampling_driverCount, sizeof(zes_driver_handle_t)); - res = ZES_DRIVER_GET_PTR(&_sampling_driverCount, _sampling_hDrivers); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DRIVER_GET_PTR", res); - return -1; - } - _sampling_deviceCount = (uint32_t *)calloc(_sampling_driverCount, sizeof(uint32_t)); - _sampling_hDevices = - (zes_device_handle_t **)calloc(_sampling_driverCount, sizeof(zes_device_handle_t *)); - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - res = - ZES_DEVICE_GET_PTR(_sampling_hDrivers[driverIdx], &_sampling_deviceCount[driverIdx], NULL); - if (res != ZE_RESULT_SUCCESS || _sampling_deviceCount[driverIdx] == 0) { - fprintf(stderr, "ERROR: No device found!\n"); - _ZE_ERROR_MSG("1st ZES_DEVICE_GET_PTR", res); - return -1; - } - _sampling_hDevices[driverIdx] = (zes_device_handle_t *)calloc(_sampling_deviceCount[driverIdx], - sizeof(zes_device_handle_t)); - res = ZES_DEVICE_GET_PTR(_sampling_hDrivers[driverIdx], &_sampling_deviceCount[driverIdx], - _sampling_hDevices[driverIdx]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_GET_PTR", res); - free(_sampling_hDevices[driverIdx]); - return -1; - } - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - - zes_device_properties_t deviceProps = {0}; - deviceProps.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES; - deviceProps.pNext = NULL; - res = ZES_DEVICE_GET_PROPERTIES_PTR(_sampling_hDevices[driverIdx][deviceIdx], &deviceProps); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_DEVICE_GET_PROPERTIES_PTR", res); - } - do_tracepoint(lttng_ust_ze_sampling, deviceProperties, - (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], deviceIdx, - &deviceProps); - } - } - intializeFrequency(); - intializePower(); - intializeEngines(); - intializeFabricPorts(); - intializeMemModules(); - return 0; -} - -static void readFrequency_dump(uint32_t driverIdx, uint32_t deviceIdx) { - if (!_sampling_freq_initialized) - return; - ze_result_t result; - for (uint32_t domainIdx = 0; domainIdx < _sampling_freqDomainCounts[driverIdx][deviceIdx]; - domainIdx++) { - zes_freq_state_t freqState = {0}; - result = ZES_FREQUENCY_GET_STATE_PTR(_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], - &freqState); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_FREQUENCY_GET_STATE_PTR", result); - continue; - } - do_tracepoint(lttng_ust_ze_sampling, gpu_frequency, - (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_freq_handle_t)_sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], - domainIdx, &freqState); - } -} - -static void readFabricPorts_dump(uint32_t driverIdx, uint32_t deviceIdx) { - if (!_sampling_fabricPorts_initialized) - return; - ze_result_t result; - for (uint32_t portIdx = 0; portIdx < _sampling_fabricPortCount[driverIdx][deviceIdx]; portIdx++) { - zes_fabric_port_state_t portState = {0}; - portState.pNext = NULL; - portState.stype = ZES_STRUCTURE_TYPE_FABRIC_PORT_STATE; - result = ZES_FABRIC_PORT_GET_STATE_PTR(_sampling_hFabricPort[driverIdx][deviceIdx][portIdx], - &portState); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_STATE_PTR", result); - continue; - } - zes_fabric_port_throughput_t throughput = {0}; - result = ZES_FABRIC_PORT_GET_THROUGHPUT_PTR( - _sampling_hFabricPort[driverIdx][deviceIdx][portIdx], &throughput); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_THROUGHPUT_PTR", result); - continue; - } - do_tracepoint(lttng_ust_ze_sampling, fabricPort, - (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_fabric_port_handle_t)_sampling_hFabricPort[driverIdx][deviceIdx][portIdx], - &portState, &throughput); - } -} -static void readMemModules_dump(uint32_t driverIdx, uint32_t deviceIdx) { - if (!_sampling_memModules_initialized) - return; - ze_result_t result; - for (uint32_t memModuleIdx = 0; memModuleIdx < _sampling_memModuleCount[driverIdx][deviceIdx]; - ++memModuleIdx) { - zes_mem_state_t memState = {0}; - memState.stype = ZES_STRUCTURE_TYPE_MEM_STATE; - zes_mem_bandwidth_t memBandwidth = {0}; - result = ZES_MEMORY_GET_STATE_PTR(_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], - &memState); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_MEMORY_GET_STATE_PTR", result); - continue; - } - result = ZES_MEMORY_GET_BANDWIDTH_PTR(_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], - &memBandwidth); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_MEMORY_GET_BANDWIDTH_PTR", result); - continue; - } - do_tracepoint(lttng_ust_ze_sampling, memStats, - (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_mem_handle_t)_sampling_hMemModule[driverIdx][deviceIdx][memModuleIdx], - &memState, &memBandwidth); - } -} - -static void readEnergy_dump(uint32_t driverIdx, uint32_t deviceIdx) { - if (!_sampling_pwr_initialized) - return; - ze_result_t result; - for (uint32_t domainIdx = 0; domainIdx < _sampling_powerDomainCounts[driverIdx][deviceIdx]; - domainIdx++) { - zes_power_energy_counter_t energyCounter = {0}; - result = ZES_POWER_GET_ENERGY_COUNTER_PTR(_sampling_hPowers[driverIdx][deviceIdx][domainIdx], - &energyCounter); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_POWER_GET_ENERGY_COUNTER_PTR", result); - continue; - } - do_tracepoint(lttng_ust_ze_sampling, gpu_energy, - (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_pwr_handle_t)_sampling_hPowers[driverIdx][deviceIdx][domainIdx], domainIdx, - &energyCounter); - } -} - -static void readEngines_dump(uint32_t driverIdx, uint32_t deviceIdx) { - if (!_sampling_engines_initialized) - return; - ze_result_t result; - for (uint32_t engineIdx = 0; engineIdx < _sampling_engineCounts[driverIdx][deviceIdx]; - ++engineIdx) { - zes_engine_stats_t engineStats = {0}; - result = ZES_ENGINE_GET_ACTIVITY_PTR(_sampling_engineHandles[driverIdx][deviceIdx][engineIdx], - &engineStats); - if (result != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_ENGINE_GET_ACTIVITY_PTR", result); - continue; - } - do_tracepoint(lttng_ust_ze_sampling, engineStats, - (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], - (zes_engine_handle_t)_sampling_engineHandles[driverIdx][deviceIdx][engineIdx], - &engineStats); - } -} - -static void thapi_sampling_energy() { - for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - if (tracepoint_enabled(lttng_ust_ze_sampling, gpu_frequency)) { - readFrequency_dump(driverIdx, deviceIdx); - } - if (tracepoint_enabled(lttng_ust_ze_sampling, gpu_energy)) { - readEnergy_dump(driverIdx, deviceIdx); - } - if (tracepoint_enabled(lttng_ust_ze_sampling, engineStats)) { - readEngines_dump(driverIdx, deviceIdx); - } - if (tracepoint_enabled(lttng_ust_ze_sampling, fabricPort)) { - readFabricPorts_dump(driverIdx, deviceIdx); - } - if (tracepoint_enabled(lttng_ust_ze_sampling, memStats)) { - readMemModules_dump(driverIdx, deviceIdx); - } - } - } -} static void _load_tracer(void) { char *s = NULL; void *handle = NULL; int verbose = 0; - struct timespec interval; - thapi_sampling_init(); + //struct timespec interval; + //thapi_sampling_init(); s = getenv("LTTNG_UST_ZE_LIBZE_LOADER"); if (s) @@ -1318,17 +847,8 @@ static void _load_tracer(void) { if (s) _do_paranoid_memory_location = 1; - s = getenv("LTTNG_UST_SAMPLING_ENERGY"); - if (s) { - initializeHandles(); - /* TODO: make it configurable */ - interval.tv_sec = 0; - interval.tv_nsec = 50000000; - thapi_sampling_energy(); - _sampling_handle = thapi_register_sampling(&thapi_sampling_energy, &interval); - } - _do_cleanup = 1; + #ifndef THAPI_USE_DESTRUCTORS atexit(_lib_cleanup); #endif From 2fd6b64fca31aba07aa80633e6e3f5fe5e77a47b Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Fri, 8 Nov 2024 09:32:38 -0600 Subject: [PATCH 2/8] Added correct dependencies. --- ze/Makefile.am | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ze/Makefile.am b/ze/Makefile.am index a866613b..adf9507d 100644 --- a/ze/Makefile.am +++ b/ze/Makefile.am @@ -153,7 +153,14 @@ BUILT_SOURCES = \ bin_PROGRAMS = sampling_daemon -sampling_daemon_SOURCES = sampling_daemon.c +sampling_daemon_SOURCES = \ + sampling_daemon.c \ + sampling_daemon.h + +nodist_sampling_daemon_SOURCES = \ + $(ZE_PROBES_INCL) \ + $(ZE_STATIC_PROBES_INCL) + sampling_daemon_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/ze/include -I./ sampling_daemon_CFLAGS = -Wall -Wextra $(WERROR) $(LTTNG_UST_CFLAGS) sampling_daemon_LDADD = libzetracepoints.la -ldl -lpthread $(LTTNG_UST_LIBS) ../sampling/libThapiSampling.la From e11b7b49154763aa335347e2eff05fb81e3f8482 Mon Sep 17 00:00:00 2001 From: sbekele Date: Mon, 11 Nov 2024 20:44:36 +0000 Subject: [PATCH 3/8] sampling daemon synched --- xprof/xprof.rb.in | 47 +++-- ze/sampling_daemon.c | 419 +++++++++++++++++++++---------------------- 2 files changed, 233 insertions(+), 233 deletions(-) diff --git a/xprof/xprof.rb.in b/xprof/xprof.rb.in index 8dfa3daf..bb099e3e 100755 --- a/xprof/xprof.rb.in +++ b/xprof/xprof.rb.in @@ -375,7 +375,7 @@ end def sampling? return false unless OPTIONS[:sample] - env_fetch_first('LTTNG_UST_SAMPLING_MASTER_ONLY', default: '1') == '0' || mpi_local_master? + mpi_local_master? end def env_tracers @@ -738,22 +738,35 @@ def gm_rename_folder exec("mv -T #{thapi_trace_dir_tmp_root} #{thapi_trace_dir_root}") unless OPTIONS[:'trace-output'] thapi_trace_dir_root end -SIGRTMIN = 40 -RT_SIGNAL_SAMPLING_READY = SIGRTMIN -RT_SIGNAL_SAMPLING_FINISH = SIGRTMIN + 1 -def start_sampling_daemon() - puts "Started sampling daemon (PID #{Process.pid}})" - sampling_daemon_pid = spawn("sampling_daemon #{Process.pid}") + +SIGRTMIN = 34 +SIG_SAMPLING_READY = SIGRTMIN +SIG_SAMPLING_FINISH = SIGRTMIN + 1 + +def start_sampling_daemon(parent_pid) + puts "Starting sampling daemon for parent process PID #{parent_pid}" + sampling_daemon_pid = spawn("/home/sbekele/sampling_daemon/bin/sampling_daemon #{parent_pid}") Process.detach(sampling_daemon_pid) - puts "Started sampling daemon (PID #{sampling_daemon_pid }})" sampling_daemon_pid end -def stop_sampling_daemon(sampling_daemon_pid) - Process.kill(RT_SIGNAL_SAMPLING_FINISH, sampling_daemon_pid) - puts "Sent FINISH signal to sampling daemon (PID #{sampling_daemon_pid})" +# Wait for the READY signal from the sampling daemon +def wait_for_ready_signal + received_ready = false + Signal.trap(SIG_SAMPLING_READY) do + puts "Received READY signal from sampling daemon" + received_ready = true + end + sleep(0.1) while !received_ready # Wait loop until READY signal is received +end + +# Send the FINISH signal to terminate the sampling daemon +def send_finish_signal(sampling_daemon_pid) + Process.kill(SIG_SAMPLING_FINISH, sampling_daemon_pid) + puts "Sent FINISH signal to sampling daemon PID #{sampling_daemon_pid}" end -# Start, Stop lttng, amd do the on-node analsysis + + def trace_and_on_node_processing(usr_argv) def teardown_lttng(syncd, pids, sampling_daemon_pid = nil) # We need to be sure that all the local ranks are finished @@ -764,7 +777,7 @@ def trace_and_on_node_processing(usr_argv) # for the early exiting ranks return unless mpi_local_master? - stop_sampling_daemon(sampling_daemon_pid) if sampling_daemon_pid + #stop_sampling_daemon(sampling_daemon_pid) if sampling_daemon_pid # Stop Lttng session and babeltrace daemons lm_lttng_teardown_session if OPTIONS[:archive] @@ -792,10 +805,12 @@ def trace_and_on_node_processing(usr_argv) end syncd.local_barrier('waiting_for_lttng_setup') + if sampling? - sampling_daemon_pid = start_sampling_daemon() + sampling_daemon_pid = start_sampling_daemon(Process.pid) + puts "Started sampling daemon with PID #{sampling_daemon_pid}" + wait_for_ready_signal end - # Launch User Command begin XprofExitCode.update(launch_usr_bin(h, usr_argv), usr_argv.join(' ')) @@ -803,7 +818,7 @@ def trace_and_on_node_processing(usr_argv) teardown_lttng(syncd, pids) raise end - + send_finish_signal(sampling_daemon_pid) if sampling_daemon_pid teardown_lttng(syncd, pids, sampling_daemon_pid) return unless mpi_local_master? diff --git a/ze/sampling_daemon.c b/ze/sampling_daemon.c index 60fd48b3..0b118266 100644 --- a/ze/sampling_daemon.c +++ b/ze/sampling_daemon.c @@ -1,39 +1,38 @@ -#include -#include -#include "ze.h.include" -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "sampling_daemon.h" +#include "../sampling/thapi_sampling.h" #include "uthash.h" #include "utlist.h" - +#include "ze.h.include" +#include "ze_build.h" +#include "ze_profiling.h" +#include "ze_properties.h" +#include "ze_sampling.h" +#include "ze_structs_tracepoints.h" #include "ze_tracepoints.h" -#include "zet_tracepoints.h" -#include "zes_tracepoints.h" +#include "zel_structs_tracepoints.h" #include "zel_tracepoints.h" -#include "zex_tracepoints.h" -#include "ze_structs_tracepoints.h" -#include "zet_structs_tracepoints.h" #include "zes_structs_tracepoints.h" -#include "zel_structs_tracepoints.h" +#include "zes_tracepoints.h" +#include "zet_structs_tracepoints.h" +#include "zet_tracepoints.h" #include "zex_structs_tracepoints.h" -#include "ze_sampling.h" -#include "ze_profiling.h" -#include "ze_properties.h" -#include "ze_build.h" -#include "sampling_daemon.h" -#include "../sampling/thapi_sampling.h" -#include +#include "zex_tracepoints.h" +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include -#define RT_SIGNAL_SAMPLING_READY SIGRTMIN -#define RT_SIGNAL_SAMPLING_FINISH SIGRTMIN + 1 +#define SIG_SAMPLING_READY SIGRTMIN +#define SIG_SAMPLING_FINISH (SIGRTMIN + 1) #define ZES_INIT_PTR zesInit_ptr @@ -43,30 +42,24 @@ #define ZES_DEVICE_GET_PROPERTIES_PTR zesDeviceGetProperties_ptr - #define ZES_DEVICE_ENUM_POWER_DOMAINS_PTR zesDeviceEnumPowerDomains_ptr #define ZES_POWER_GET_PROPERTIES_PTR zesPowerGetProperties_ptr #define ZES_POWER_GET_ENERGY_COUNTER_PTR zesPowerGetEnergyCounter_ptr - - #define ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR zesDeviceEnumFrequencyDomains_ptr #define ZES_FREQUENCY_GET_PROPERTIES_PTR zesFrequencyGetProperties_ptr #define ZES_FREQUENCY_GET_STATE_PTR zesFrequencyGetState_ptr - #define ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR zesDeviceEnumEngineGroups_ptr #define ZES_ENGINE_GET_PROPERTIES_PTR zesEngineGetProperties_ptr #define ZES_ENGINE_GET_ACTIVITY_PTR zesEngineGetActivity_ptr - - #define ZES_DEVICE_ENUM_FABRIC_PORTS_PTR zesDeviceEnumFabricPorts_ptr #define ZES_FABRIC_PORT_GET_PROPERTIES_PTR zesFabricPortGetProperties_ptr @@ -75,8 +68,6 @@ #define ZES_FABRIC_PORT_GET_THROUGHPUT_PTR zesFabricPortGetThroughput_ptr - - #define ZES_DEVICE_ENUM_MEMORY_MODULES_PTR zesDeviceEnumMemoryModules_ptr #define ZES_MEMORY_GET_PROPERTIES_PTR zesMemoryGetProperties_ptr @@ -85,83 +76,91 @@ #define ZES_MEMORY_GET_BANDWIDTH_PTR zesMemoryGetBandwidth_ptr - - typedef ze_result_t (*zesInit_t)(zes_init_flags_t flags); -static zesInit_t ZES_INIT_PTR = (void *) 0x0; +static zesInit_t ZES_INIT_PTR = (void *)0x0; typedef ze_result_t (*zesDriverGet_t)(uint32_t *pCount, zes_driver_handle_t *phDrivers); -static zesDriverGet_t ZES_DRIVER_GET_PTR = (void *) 0x0; - -typedef ze_result_t (*zesDeviceGet_t)(zes_driver_handle_t hDriver, uint32_t *pCount, zes_device_handle_t *phDevices); -static zesDeviceGet_t ZES_DEVICE_GET_PTR = (void *) 0x0; - -typedef ze_result_t (*zesDeviceGetProperties_t)(zes_device_handle_t hDevice, zes_device_properties_t *pProperties); -static zesDeviceGetProperties_t ZES_DEVICE_GET_PROPERTIES_PTR = (void *) 0x0; - - +static zesDriverGet_t ZES_DRIVER_GET_PTR = (void *)0x0; -typedef ze_result_t (*zesDeviceEnumPowerDomains_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_pwr_handle_t *phPower); -static zesDeviceEnumPowerDomains_t ZES_DEVICE_ENUM_POWER_DOMAINS_PTR = (void *) 0x0; +typedef ze_result_t (*zesDeviceGet_t)(zes_driver_handle_t hDriver, uint32_t *pCount, + zes_device_handle_t *phDevices); +static zesDeviceGet_t ZES_DEVICE_GET_PTR = (void *)0x0; -typedef ze_result_t (*zesPowerGetProperties_t)(zes_pwr_handle_t hPower, zes_power_properties_t *pProperties); -static zesPowerGetProperties_t ZES_POWER_GET_PROPERTIES_PTR = (void *) 0x0; +typedef ze_result_t (*zesDeviceGetProperties_t)(zes_device_handle_t hDevice, + zes_device_properties_t *pProperties); +static zesDeviceGetProperties_t ZES_DEVICE_GET_PROPERTIES_PTR = (void *)0x0; -typedef ze_result_t (*zesPowerGetEnergyCounter_t)(zes_pwr_handle_t hPower, zes_power_energy_counter_t *pEnergy); -static zesPowerGetEnergyCounter_t ZES_POWER_GET_ENERGY_COUNTER_PTR = (void *) 0x0; +typedef ze_result_t (*zesDeviceEnumPowerDomains_t)(zes_device_handle_t hDevice, uint32_t *pCount, + zes_pwr_handle_t *phPower); +static zesDeviceEnumPowerDomains_t ZES_DEVICE_ENUM_POWER_DOMAINS_PTR = (void *)0x0; +typedef ze_result_t (*zesPowerGetProperties_t)(zes_pwr_handle_t hPower, + zes_power_properties_t *pProperties); +static zesPowerGetProperties_t ZES_POWER_GET_PROPERTIES_PTR = (void *)0x0; +typedef ze_result_t (*zesPowerGetEnergyCounter_t)(zes_pwr_handle_t hPower, + zes_power_energy_counter_t *pEnergy); +static zesPowerGetEnergyCounter_t ZES_POWER_GET_ENERGY_COUNTER_PTR = (void *)0x0; -typedef ze_result_t (*zesDeviceEnumFrequencyDomains_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_freq_handle_t *phFrequency); -static zesDeviceEnumFrequencyDomains_t ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR = (void *) 0x0; +typedef ze_result_t (*zesDeviceEnumFrequencyDomains_t)(zes_device_handle_t hDevice, + uint32_t *pCount, + zes_freq_handle_t *phFrequency); +static zesDeviceEnumFrequencyDomains_t ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR = (void *)0x0; -typedef ze_result_t (*zesFrequencyGetProperties_t)(zes_freq_handle_t hFrequency, zes_freq_properties_t *pProperties); -static zesFrequencyGetProperties_t ZES_FREQUENCY_GET_PROPERTIES_PTR = (void *) 0x0; +typedef ze_result_t (*zesFrequencyGetProperties_t)(zes_freq_handle_t hFrequency, + zes_freq_properties_t *pProperties); +static zesFrequencyGetProperties_t ZES_FREQUENCY_GET_PROPERTIES_PTR = (void *)0x0; -typedef ze_result_t (*zesFrequencyGetState_t)(zes_freq_handle_t hFrequency, zes_freq_state_t *pState); -static zesFrequencyGetState_t ZES_FREQUENCY_GET_STATE_PTR = (void *) 0x0; +typedef ze_result_t (*zesFrequencyGetState_t)(zes_freq_handle_t hFrequency, + zes_freq_state_t *pState); +static zesFrequencyGetState_t ZES_FREQUENCY_GET_STATE_PTR = (void *)0x0; +typedef ze_result_t (*zesDeviceEnumEngineGroups_t)(zes_device_handle_t hDevice, uint32_t *pCount, + zes_engine_handle_t *phEngine); +static zesDeviceEnumEngineGroups_t ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR = (void *)0x0; +typedef ze_result_t (*zesEngineGetProperties_t)(zes_engine_handle_t hEngine, + zes_engine_properties_t *pProperties); +static zesEngineGetProperties_t ZES_ENGINE_GET_PROPERTIES_PTR = (void *)0x0; -typedef ze_result_t (*zesDeviceEnumEngineGroups_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_engine_handle_t *phEngine); -static zesDeviceEnumEngineGroups_t ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR = (void *) 0x0; +typedef ze_result_t (*zesEngineGetActivity_t)(zes_engine_handle_t hEngine, + zes_engine_stats_t *pStats); +static zesEngineGetActivity_t ZES_ENGINE_GET_ACTIVITY_PTR = (void *)0x0; -typedef ze_result_t (*zesEngineGetProperties_t)(zes_engine_handle_t hEngine, zes_engine_properties_t *pProperties); -static zesEngineGetProperties_t ZES_ENGINE_GET_PROPERTIES_PTR = (void *) 0x0; +typedef ze_result_t (*zesDeviceEnumFabricPorts_t)(zes_device_handle_t hDevice, uint32_t *pCount, + zes_fabric_port_handle_t *phPort); +static zesDeviceEnumFabricPorts_t ZES_DEVICE_ENUM_FABRIC_PORTS_PTR = (void *)0x0; -typedef ze_result_t (*zesEngineGetActivity_t)(zes_engine_handle_t hEngine, zes_engine_stats_t *pStats); -static zesEngineGetActivity_t ZES_ENGINE_GET_ACTIVITY_PTR = (void *) 0x0; +typedef ze_result_t (*zesFabricPortGetProperties_t)(zes_fabric_port_handle_t hPort, + zes_fabric_port_properties_t *pProperties); +static zesFabricPortGetProperties_t ZES_FABRIC_PORT_GET_PROPERTIES_PTR = (void *)0x0; +typedef ze_result_t (*zesFabricPortGetState_t)(zes_fabric_port_handle_t hPort, + zes_fabric_port_state_t *pState); +static zesFabricPortGetState_t ZES_FABRIC_PORT_GET_STATE_PTR = (void *)0x0; +typedef ze_result_t (*zesFabricPortGetThroughput_t)(zes_fabric_port_handle_t hPort, + zes_fabric_port_throughput_t *pThroughput); +static zesFabricPortGetThroughput_t ZES_FABRIC_PORT_GET_THROUGHPUT_PTR = (void *)0x0; -typedef ze_result_t (*zesDeviceEnumFabricPorts_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_fabric_port_handle_t *phPort); -static zesDeviceEnumFabricPorts_t ZES_DEVICE_ENUM_FABRIC_PORTS_PTR = (void *) 0x0; +typedef ze_result_t (*zesDeviceEnumMemoryModules_t)(zes_device_handle_t hDevice, uint32_t *pCount, + zes_mem_handle_t *phMemory); +static zesDeviceEnumMemoryModules_t ZES_DEVICE_ENUM_MEMORY_MODULES_PTR = (void *)0x0; -typedef ze_result_t (*zesFabricPortGetProperties_t)(zes_fabric_port_handle_t hPort, zes_fabric_port_properties_t *pProperties); -static zesFabricPortGetProperties_t ZES_FABRIC_PORT_GET_PROPERTIES_PTR = (void *) 0x0; - -typedef ze_result_t (*zesFabricPortGetState_t)(zes_fabric_port_handle_t hPort, zes_fabric_port_state_t *pState); -static zesFabricPortGetState_t ZES_FABRIC_PORT_GET_STATE_PTR = (void *) 0x0; - -typedef ze_result_t (*zesFabricPortGetThroughput_t)(zes_fabric_port_handle_t hPort, zes_fabric_port_throughput_t *pThroughput); -static zesFabricPortGetThroughput_t ZES_FABRIC_PORT_GET_THROUGHPUT_PTR = (void *) 0x0; - - -typedef ze_result_t (*zesDeviceEnumMemoryModules_t)(zes_device_handle_t hDevice, uint32_t *pCount, zes_mem_handle_t *phMemory); -static zesDeviceEnumMemoryModules_t ZES_DEVICE_ENUM_MEMORY_MODULES_PTR = (void *) 0x0; - -typedef ze_result_t (*zesMemoryGetProperties_t)(zes_mem_handle_t hMemory, zes_mem_properties_t *pProperties); -static zesMemoryGetProperties_t ZES_MEMORY_GET_PROPERTIES_PTR = (void *) 0x0; +typedef ze_result_t (*zesMemoryGetProperties_t)(zes_mem_handle_t hMemory, + zes_mem_properties_t *pProperties); +static zesMemoryGetProperties_t ZES_MEMORY_GET_PROPERTIES_PTR = (void *)0x0; typedef ze_result_t (*zesMemoryGetState_t)(zes_mem_handle_t hMemory, zes_mem_state_t *pState); -static zesMemoryGetState_t ZES_MEMORY_GET_STATE_PTR = (void *) 0x0; +static zesMemoryGetState_t ZES_MEMORY_GET_STATE_PTR = (void *)0x0; -typedef ze_result_t (*zesMemoryGetBandwidth_t)(zes_mem_handle_t hMemory, zes_mem_bandwidth_t *pBandwidth); -static zesMemoryGetBandwidth_t ZES_MEMORY_GET_BANDWIDTH_PTR = (void *) 0x0; +typedef ze_result_t (*zesMemoryGetBandwidth_t)(zes_mem_handle_t hMemory, + zes_mem_bandwidth_t *pBandwidth); +static zesMemoryGetBandwidth_t ZES_MEMORY_GET_BANDWIDTH_PTR = (void *)0x0; -static void find_ze_symbols(void * handle, int verbose) { +static void find_ze_symbols(void *handle, int verbose) { - ZES_INIT_PTR = (zesInit_t)(intptr_t)dlsym(handle, "zesInit"); + ZES_INIT_PTR = (zesInit_t)(intptr_t)dlsym(handle, "zesInit"); if (!ZES_INIT_PTR && verbose) fprintf(stderr, "Missing symbol zesInit!\n"); @@ -169,81 +168,87 @@ static void find_ze_symbols(void * handle, int verbose) { if (!ZES_DRIVER_GET_PTR && verbose) fprintf(stderr, "Missing symbol zesDriverGet!\n"); -ZES_DEVICE_GET_PTR = (zesDeviceGet_t)(intptr_t)dlsym(handle, "zesDeviceGet"); + ZES_DEVICE_GET_PTR = (zesDeviceGet_t)(intptr_t)dlsym(handle, "zesDeviceGet"); if (!ZES_DEVICE_GET_PTR && verbose) fprintf(stderr, "Missing symbol zesDeviceGet!\n"); - ZES_DEVICE_GET_PROPERTIES_PTR = (zesDeviceGetProperties_t)(intptr_t)dlsym(handle, "zesDeviceGetProperties"); + ZES_DEVICE_GET_PROPERTIES_PTR = + (zesDeviceGetProperties_t)(intptr_t)dlsym(handle, "zesDeviceGetProperties"); if (!ZES_DEVICE_GET_PROPERTIES_PTR && verbose) fprintf(stderr, "Missing symbol zesDeviceGetProperties!\n"); - - -ZES_DEVICE_ENUM_POWER_DOMAINS_PTR = (zesDeviceEnumPowerDomains_t)(intptr_t)dlsym(handle, "zesDeviceEnumPowerDomains"); + ZES_DEVICE_ENUM_POWER_DOMAINS_PTR = + (zesDeviceEnumPowerDomains_t)(intptr_t)dlsym(handle, "zesDeviceEnumPowerDomains"); if (!ZES_DEVICE_ENUM_POWER_DOMAINS_PTR && verbose) fprintf(stderr, "Missing symbol zesDeviceEnumPowerDomains!\n"); -ZES_POWER_GET_PROPERTIES_PTR = (zesPowerGetProperties_t)(intptr_t)dlsym(handle, "zesPowerGetProperties"); + ZES_POWER_GET_PROPERTIES_PTR = + (zesPowerGetProperties_t)(intptr_t)dlsym(handle, "zesPowerGetProperties"); if (!ZES_POWER_GET_PROPERTIES_PTR && verbose) fprintf(stderr, "Missing symbol zesPowerGetProperties!\n"); - ZES_POWER_GET_ENERGY_COUNTER_PTR = (zesPowerGetEnergyCounter_t)(intptr_t)dlsym(handle, "zesPowerGetEnergyCounter"); + ZES_POWER_GET_ENERGY_COUNTER_PTR = + (zesPowerGetEnergyCounter_t)(intptr_t)dlsym(handle, "zesPowerGetEnergyCounter"); if (!ZES_POWER_GET_ENERGY_COUNTER_PTR && verbose) fprintf(stderr, "Missing symbol zesPowerGetEnergyCounter!\n"); - - -ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR = (zesDeviceEnumFrequencyDomains_t)(intptr_t)dlsym(handle, "zesDeviceEnumFrequencyDomains"); + ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR = + (zesDeviceEnumFrequencyDomains_t)(intptr_t)dlsym(handle, "zesDeviceEnumFrequencyDomains"); if (!ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR && verbose) fprintf(stderr, "Missing symbol zesDeviceEnumFrequencyDomains!\n"); - ZES_FREQUENCY_GET_PROPERTIES_PTR = (zesFrequencyGetProperties_t)(intptr_t)dlsym(handle, "zesFrequencyGetProperties"); + ZES_FREQUENCY_GET_PROPERTIES_PTR = + (zesFrequencyGetProperties_t)(intptr_t)dlsym(handle, "zesFrequencyGetProperties"); if (!ZES_FREQUENCY_GET_PROPERTIES_PTR && verbose) fprintf(stderr, "Missing symbol zesFrequencyGetProperties!\n"); - ZES_FREQUENCY_GET_STATE_PTR = (zesFrequencyGetState_t)(intptr_t)dlsym(handle, "zesFrequencyGetState"); + ZES_FREQUENCY_GET_STATE_PTR = + (zesFrequencyGetState_t)(intptr_t)dlsym(handle, "zesFrequencyGetState"); if (!ZES_FREQUENCY_GET_STATE_PTR && verbose) fprintf(stderr, "Missing symbol zesFrequencyGetState!\n"); - - -ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR = (zesDeviceEnumEngineGroups_t)(intptr_t)dlsym(handle, "zesDeviceEnumEngineGroups"); + ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR = + (zesDeviceEnumEngineGroups_t)(intptr_t)dlsym(handle, "zesDeviceEnumEngineGroups"); if (!ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR && verbose) fprintf(stderr, "Missing symbol zesDeviceEnumEngineGroups!\n"); - ZES_ENGINE_GET_PROPERTIES_PTR = (zesEngineGetProperties_t)(intptr_t)dlsym(handle, "zesEngineGetProperties"); + ZES_ENGINE_GET_PROPERTIES_PTR = + (zesEngineGetProperties_t)(intptr_t)dlsym(handle, "zesEngineGetProperties"); if (!ZES_ENGINE_GET_PROPERTIES_PTR && verbose) fprintf(stderr, "Missing symbol zesEngineGetProperties!\n"); - ZES_ENGINE_GET_ACTIVITY_PTR = (zesEngineGetActivity_t)(intptr_t)dlsym(handle, "zesEngineGetActivity"); + ZES_ENGINE_GET_ACTIVITY_PTR = + (zesEngineGetActivity_t)(intptr_t)dlsym(handle, "zesEngineGetActivity"); if (!ZES_ENGINE_GET_ACTIVITY_PTR && verbose) fprintf(stderr, "Missing symbol zesEngineGetActivity!\n"); - - -ZES_DEVICE_ENUM_FABRIC_PORTS_PTR = (zesDeviceEnumFabricPorts_t)(intptr_t)dlsym(handle, "zesDeviceEnumFabricPorts"); + ZES_DEVICE_ENUM_FABRIC_PORTS_PTR = + (zesDeviceEnumFabricPorts_t)(intptr_t)dlsym(handle, "zesDeviceEnumFabricPorts"); if (!ZES_DEVICE_ENUM_FABRIC_PORTS_PTR && verbose) fprintf(stderr, "Missing symbol zesDeviceEnumFabricPorts!\n"); - ZES_FABRIC_PORT_GET_PROPERTIES_PTR = (zesFabricPortGetProperties_t)(intptr_t)dlsym(handle, "zesFabricPortGetProperties"); + ZES_FABRIC_PORT_GET_PROPERTIES_PTR = + (zesFabricPortGetProperties_t)(intptr_t)dlsym(handle, "zesFabricPortGetProperties"); if (!ZES_FABRIC_PORT_GET_PROPERTIES_PTR && verbose) fprintf(stderr, "Missing symbol zesFabricPortGetProperties!\n"); -ZES_FABRIC_PORT_GET_STATE_PTR = (zesFabricPortGetState_t)(intptr_t)dlsym(handle, "zesFabricPortGetState"); + ZES_FABRIC_PORT_GET_STATE_PTR = + (zesFabricPortGetState_t)(intptr_t)dlsym(handle, "zesFabricPortGetState"); if (!ZES_FABRIC_PORT_GET_STATE_PTR && verbose) fprintf(stderr, "Missing symbol zesFabricPortGetState!\n"); - ZES_FABRIC_PORT_GET_THROUGHPUT_PTR = (zesFabricPortGetThroughput_t)(intptr_t)dlsym(handle, "zesFabricPortGetThroughput"); + ZES_FABRIC_PORT_GET_THROUGHPUT_PTR = + (zesFabricPortGetThroughput_t)(intptr_t)dlsym(handle, "zesFabricPortGetThroughput"); if (!ZES_FABRIC_PORT_GET_THROUGHPUT_PTR && verbose) fprintf(stderr, "Missing symbol zesFabricPortGetThroughput!\n"); - - -ZES_DEVICE_ENUM_MEMORY_MODULES_PTR = (zesDeviceEnumMemoryModules_t)(intptr_t)dlsym(handle, "zesDeviceEnumMemoryModules"); + ZES_DEVICE_ENUM_MEMORY_MODULES_PTR = + (zesDeviceEnumMemoryModules_t)(intptr_t)dlsym(handle, "zesDeviceEnumMemoryModules"); if (!ZES_DEVICE_ENUM_MEMORY_MODULES_PTR && verbose) fprintf(stderr, "Missing symbol zesDeviceEnumMemoryModules!\n"); - ZES_MEMORY_GET_PROPERTIES_PTR = (zesMemoryGetProperties_t)(intptr_t)dlsym(handle, "zesMemoryGetProperties"); + ZES_MEMORY_GET_PROPERTIES_PTR = + (zesMemoryGetProperties_t)(intptr_t)dlsym(handle, "zesMemoryGetProperties"); if (!ZES_MEMORY_GET_PROPERTIES_PTR && verbose) fprintf(stderr, "Missing symbol zesMemoryGetProperties!\n"); @@ -251,10 +256,10 @@ ZES_DEVICE_ENUM_MEMORY_MODULES_PTR = (zesDeviceEnumMemoryModules_t)(intptr_t)dls if (!ZES_MEMORY_GET_STATE_PTR && verbose) fprintf(stderr, "Missing symbol zesMemoryGetState!\n"); - ZES_MEMORY_GET_BANDWIDTH_PTR = (zesMemoryGetBandwidth_t)(intptr_t)dlsym(handle, "zesMemoryGetBandwidth"); + ZES_MEMORY_GET_BANDWIDTH_PTR = + (zesMemoryGetBandwidth_t)(intptr_t)dlsym(handle, "zesMemoryGetBandwidth"); if (!ZES_MEMORY_GET_BANDWIDTH_PTR && verbose) fprintf(stderr, "Missing symbol zesMemoryGetBandwidth!\n"); - } thapi_sampling_handle_t _sampling_handle = NULL; @@ -279,17 +284,23 @@ static uint32_t **_sampling_memModuleCount = NULL; static uint32_t **_sampling_powerDomainCounts = NULL; static uint32_t **_sampling_engineCounts = NULL; - //////////////////////////////////////////// -#define _ZE_ERROR_MSG(NAME,RES) do {\ - fprintf(stderr,"%s() failed at %d(%s): res=%x\n",(NAME),__LINE__,__FILE__,(RES));\ -} while (0) -#define _ZE_ERROR_MSG_NOTERMINATE(NAME,RES) do {\ - fprintf(stderr,"%s() error at %d(%s): res=%x\n",(NAME),__LINE__,__FILE__,(RES));\ -} while (0) -#define _ERROR_MSG(MSG) {perror((MSG)) do {\ - {perror((MSG)); fprintf(stderr,"errno=%d at %d(%s)",errno,__LINE__,__FILE__);\ -} while (0) +#define _ZE_ERROR_MSG(NAME, RES) \ + do { \ + fprintf(stderr, "%s() failed at %d(%s): res=%x\n", (NAME), __LINE__, __FILE__, (RES)); \ + } while (0) +#define _ZE_ERROR_MSG_NOTERMINATE(NAME, RES) \ + do { \ + fprintf(stderr, "%s() error at %d(%s): res=%x\n", (NAME), __LINE__, __FILE__, (RES)); \ + } while (0) +#define _ERROR_MSG(MSG) \ + { \ + perror((MSG)) do { \ + { \ + perror((MSG)); \ + fprintf(stderr, "errno=%d at %d(%s)", errno, __LINE__, __FILE__); \ + } \ + while (0) static void intializeFrequency() { ze_result_t res; @@ -313,7 +324,7 @@ static void intializeFrequency() { } _sampling_hFrequencies[driverIdx][deviceIdx] = (zes_freq_handle_t *)calloc( _sampling_freqDomainCounts[driverIdx][deviceIdx], sizeof(zes_freq_handle_t)); - res =ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], + res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], &_sampling_freqDomainCounts[driverIdx][deviceIdx], _sampling_hFrequencies[driverIdx][deviceIdx]); if (res != ZE_RESULT_SUCCESS) { @@ -325,7 +336,7 @@ static void intializeFrequency() { domainIdx++) { zes_freq_properties_t freqProps = {0}; freqProps.stype = ZES_STRUCTURE_TYPE_FREQ_PROPERTIES; - res =ZES_FREQUENCY_GET_PROPERTIES_PTR( + res = ZES_FREQUENCY_GET_PROPERTIES_PTR( _sampling_hFrequencies[driverIdx][deviceIdx][domainIdx], &freqProps); if (res != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("ZES_FREQUENCY_GET_PROPERTIES_PTR", res); @@ -439,7 +450,6 @@ static void intializeEngines() { _sampling_engines_initialized = 1; } - static void intializeFabricPorts() { ze_result_t res; _sampling_hFabricPort = (zes_fabric_port_handle_t ***)calloc(_sampling_driverCount, @@ -491,7 +501,6 @@ static void intializeFabricPorts() { _sampling_fabricPorts_initialized = 1; } - static void intializeMemModules() { ze_result_t res; _sampling_hMemModule = @@ -542,10 +551,9 @@ static void intializeMemModules() { _sampling_memModules_initialized = 1; } - static int initializeHandles() { ze_result_t res; - //find_ze_symbols(handle, NULL); + // find_ze_symbols(handle, NULL); res = ZES_INIT_PTR(0); if (res != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("ZES_INIT_PTR", res); @@ -656,7 +664,6 @@ static void readFabricPorts_dump(uint32_t driverIdx, uint32_t deviceIdx) { } } - static void readMemModules_dump(uint32_t driverIdx, uint32_t deviceIdx) { if (!_sampling_memModules_initialized) return; @@ -685,7 +692,6 @@ static void readMemModules_dump(uint32_t driverIdx, uint32_t deviceIdx) { } } - static void readEnergy_dump(uint32_t driverIdx, uint32_t deviceIdx) { if (!_sampling_pwr_initialized) return; @@ -726,7 +732,6 @@ static void readEngines_dump(uint32_t driverIdx, uint32_t deviceIdx) { } } - static void thapi_sampling_energy() { for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { @@ -748,94 +753,74 @@ static void thapi_sampling_energy() { } } } - +volatile bool running = true; void process_sampling() { - - struct timespec interval; - interval.tv_sec = 0; - interval.tv_nsec = 50000000; // 50ms interval - thapi_sampling_energy(); - _sampling_handle = thapi_register_sampling(&thapi_sampling_energy, &interval); - + + struct timespec interval; + interval.tv_sec = 0; + interval.tv_nsec = 50000000; // 50ms interval + thapi_sampling_energy(); + _sampling_handle = thapi_register_sampling(&thapi_sampling_energy, &interval); } void cleanup_sampling() { - if (_sampling_handle) { - thapi_unregister_sampling(_sampling_handle); - _sampling_handle = NULL; - } + if (_sampling_handle) { + thapi_unregister_sampling(_sampling_handle); + _sampling_handle = NULL; + } } -// Signal handling loop -int signal_loop(int parent_pid) { - // Initialize signal set and add signals - sigset_t signal_set; - sigemptyset(&signal_set); - sigaddset(&signal_set, RT_SIGNAL_SAMPLING_READY); - sigaddset(&signal_set, RT_SIGNAL_SAMPLING_FINISH); - - // Block signals - sigprocmask(SIG_BLOCK, &signal_set, NULL); - - // Signal the parent process READY - kill(parent_pid, RT_SIGNAL_SAMPLING_READY); - - // Processing loop: until RT_SIGNAL_FINISH - while (1) { - int signum; - sigwait(&signal_set, &signum); - - if (signum == RT_SIGNAL_SAMPLING_FINISH) { - return 0; - } else { - // Example action when READY signal is received - process_sampling(); - printf("Starting \n"); - kill(parent_pid, RT_SIGNAL_SAMPLING_READY); // Signal parent - } - } - - // Unreachable - fprintf(stderr, "Exited signal loop unexpectedly.\n"); - return 1; +void signal_handler(int signum) { + if (signum == SIG_SAMPLING_FINISH) { + printf("Received FINISH signal, stopping daemon...\n"); + // running = false; + cleanup_sampling(); + running = false; + } } int main(int argc, char **argv) { - if (argc < 2) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return 1; - } - int parent_pid = atoi(argv[1]); - int verbose = 0; - thapi_sampling_init(); - // Load necessary libraries - void *handle = NULL; - char *s = getenv("LTTNG_UST_ZE_LIBZE_LOADER"); - if (s) { - handle = dlopen(s, RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); - } else { - handle = dlopen("libze_loader.so", RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); - } - if (!handle) { - fprintf(stderr, "Failure: could not load ze library!\n"); - return 1; - } - - // Initialize daemon - if (getenv("LTTNG_UST_SAMPLING_ENERGY")) { - find_ze_symbols(handle, verbose); - initializeHandles(); - } else { - fprintf(stderr, "Sampling not enabled. Exiting.\n"); - dlclose(handle); - return 0; - } - // Run the signal loop - int ret = signal_loop(parent_pid); - // Cleanup before exiting - cleanup_sampling(); - dlclose(handle); - printf("Daemon exiting with status %d\n", ret); - kill(parent_pid, RT_SIGNAL_SAMPLING_READY); // Notify parent of clean exit - return ret; + fprintf(stderr, "Entering Main.\n"); + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + int parent_pid = atoi(argv[1]); + int verbose = 0; + fprintf(stderr, "Thapi sampling init.\n"); + thapi_sampling_init(); + + // Load necessary libraries + void *handle = NULL; + char *s = getenv("LTTNG_UST_ZE_LIBZE_LOADER"); + if (s) { + handle = dlopen(s, RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); + } else { + handle = dlopen("libze_loader.so", RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); + } + + if (!handle) { + fprintf(stderr, "Failure: could not load ze library!\n"); + return 1; + } + + // Initialize daemon + find_ze_symbols(handle, verbose); + fprintf(stderr, "Initialize the system.\n"); + initializeHandles(); + fprintf(stderr, "Daemon initialized and entering signal loop.\n"); + // Run the signal loop + signal(SIG_SAMPLING_FINISH, signal_handler); + if (parent_pid > 0) { + kill(parent_pid, SIG_SAMPLING_READY); + fprintf(stderr, "Daemon sent READY signal to parent PID %d\n", parent_pid); + } + fprintf(stderr, "Daemon waiting for signals in signal_loop.\n"); + // Clearunningnup before exiting + while (running) { + process_sampling(); // Wait for a signal to be received + } + dlclose(handle); + printf("Daemon exiting \n"); + return 0; } From 8b1d84d714ce6d354c21b2b338e3648d578a97e0 Mon Sep 17 00:00:00 2001 From: sbekele Date: Wed, 13 Nov 2024 20:29:50 +0000 Subject: [PATCH 4/8] updated sampling daemon --- xprof/xprof.rb.in | 93 ++++++++++++++++++++++---------------------- ze/sampling_daemon.c | 91 ++++++++++++++++--------------------------- 2 files changed, 80 insertions(+), 104 deletions(-) diff --git a/xprof/xprof.rb.in b/xprof/xprof.rb.in index bb099e3e..348b223a 100755 --- a/xprof/xprof.rb.in +++ b/xprof/xprof.rb.in @@ -739,46 +739,52 @@ def gm_rename_folder thapi_trace_dir_root end -SIGRTMIN = 34 -SIG_SAMPLING_READY = SIGRTMIN -SIG_SAMPLING_FINISH = SIGRTMIN + 1 +class SamplingDaemon + SIGRTMIN = 34 + SIG_SAMPLING_READY = SIGRTMIN + SIG_SAMPLING_FINISH = SIGRTMIN + 1 -def start_sampling_daemon(parent_pid) - puts "Starting sampling daemon for parent process PID #{parent_pid}" - sampling_daemon_pid = spawn("/home/sbekele/sampling_daemon/bin/sampling_daemon #{parent_pid}") - Process.detach(sampling_daemon_pid) - sampling_daemon_pid -end + attr_reader :pid -# Wait for the READY signal from the sampling daemon -def wait_for_ready_signal - received_ready = false - Signal.trap(SIG_SAMPLING_READY) do - puts "Received READY signal from sampling daemon" - received_ready = true + def initialize + @pid = nil end - sleep(0.1) while !received_ready # Wait loop until READY signal is received -end -# Send the FINISH signal to terminate the sampling daemon -def send_finish_signal(sampling_daemon_pid) - Process.kill(SIG_SAMPLING_FINISH, sampling_daemon_pid) - puts "Sent FINISH signal to sampling daemon PID #{sampling_daemon_pid}" + def start(parent_pid) + return unless sampling? + + daemon_path = "#{__dir__}/sampling_daemon" + raise "No sampling_daemon binary found at #{daemon_path}" unless File.exist?(daemon_path) + @pid = spawn("#{daemon_path} #{parent_pid}") + Process.detach(@pid) + + wait_for_ready_signal + end + + def finalize + return unless @pid + + Process.kill(SIG_SAMPLING_FINISH, @pid) + LOGGER.debug("Sent FINISH signal to sampling daemon PID #{@pid}") + end + + private + + def wait_for_ready_signal + received_ready = false + Signal.trap(SIG_SAMPLING_READY) do + received_ready = true + end + sleep(0.1) while !received_ready # Wait until READY signal is received + end end - def trace_and_on_node_processing(usr_argv) - def teardown_lttng(syncd, pids, sampling_daemon_pid = nil) - # We need to be sure that all the local ranks are finished + def teardown_lttng(syncd, sampling_daemon, pids) syncd.local_barrier('waiting_for_application_ending') - # Everything from now on, is some local-master processing - # The `Sync_daemon` context will handle the call to the global barrier - # for the early exiting ranks return unless mpi_local_master? - #stop_sampling_daemon(sampling_daemon_pid) if sampling_daemon_pid - # Stop Lttng session and babeltrace daemons lm_lttng_teardown_session if OPTIONS[:archive] LOGGER.debug("Waiting for babeltrace_thapi and dirwatch (#{pids}) to finish") @@ -787,47 +793,40 @@ def trace_and_on_node_processing(usr_argv) XprofExitCode.update(status, "babeltrace_thapi or dirwatch #{pid}") end end - # we can kill the session daemon lm_lttng_kill_sessiond + sampling_daemon.finalize if sampling_daemon end SyncDaemon.open do |syncd| - # Load Tracers and APILoaders Lib + sampling_daemon = nil + sampling_daemon = SamplingDaemon.new if sampling? + sampling_daemon&.start(Process.pid) + backends, h = env_tracers - # All ranks need to set the LLTTNG_HOME env - # so they can have access to the daemon ENV['LTTNG_HOME'] = lttng_home_dir - # Only local master spawn LTTNG daemon and start session pids = if mpi_local_master? lm_setup_lttng(backends) lm_babeltrace(backends) if OPTIONS[:archive] - end - + end + syncd.local_barrier('waiting_for_lttng_setup') - if sampling? - sampling_daemon_pid = start_sampling_daemon(Process.pid) - puts "Started sampling daemon with PID #{sampling_daemon_pid}" - wait_for_ready_signal - end # Launch User Command begin XprofExitCode.update(launch_usr_bin(h, usr_argv), usr_argv.join(' ')) rescue Errno::ENOENT - teardown_lttng(syncd, pids) + teardown_lttng(syncd, sampling_daemon, pids) raise end - send_finish_signal(sampling_daemon_pid) if sampling_daemon_pid - teardown_lttng(syncd, pids, sampling_daemon_pid) + + teardown_lttng(syncd, sampling_daemon, pids) return unless mpi_local_master? - # Preprocess trace lm_babeltrace(backends) unless OPTIONS[:archive] lm_move_to_shared end - # Global master rename the unique trace folder to a more - # human friendly name + gm_rename_folder if mpi_master? end diff --git a/ze/sampling_daemon.c b/ze/sampling_daemon.c index 0b118266..2bdd973d 100644 --- a/ze/sampling_daemon.c +++ b/ze/sampling_daemon.c @@ -1,37 +1,20 @@ #include "sampling_daemon.h" #include "../sampling/thapi_sampling.h" -#include "uthash.h" -#include "utlist.h" -#include "ze.h.include" #include "ze_build.h" -#include "ze_profiling.h" -#include "ze_properties.h" #include "ze_sampling.h" -#include "ze_structs_tracepoints.h" -#include "ze_tracepoints.h" -#include "zel_structs_tracepoints.h" -#include "zel_tracepoints.h" -#include "zes_structs_tracepoints.h" -#include "zes_tracepoints.h" -#include "zet_structs_tracepoints.h" -#include "zet_tracepoints.h" -#include "zex_structs_tracepoints.h" -#include "zex_tracepoints.h" #include #include #include -#include -#include -#include -#include -#include #include #include #include #include #include +#include +#include +#include -#define SIG_SAMPLING_READY SIGRTMIN +#define SIG_SAMPLING_READY (SIGRTMIN) #define SIG_SAMPLING_FINISH (SIGRTMIN + 1) #define ZES_INIT_PTR zesInit_ptr @@ -261,7 +244,7 @@ static void find_ze_symbols(void *handle, int verbose) { if (!ZES_MEMORY_GET_BANDWIDTH_PTR && verbose) fprintf(stderr, "Missing symbol zesMemoryGetBandwidth!\n"); } - +volatile bool running = true; thapi_sampling_handle_t _sampling_handle = NULL; static int _sampling_freq_initialized = 0; static int _sampling_fabricPorts_initialized = 0; @@ -285,22 +268,21 @@ static uint32_t **_sampling_powerDomainCounts = NULL; static uint32_t **_sampling_engineCounts = NULL; //////////////////////////////////////////// -#define _ZE_ERROR_MSG(NAME, RES) \ - do { \ - fprintf(stderr, "%s() failed at %d(%s): res=%x\n", (NAME), __LINE__, __FILE__, (RES)); \ - } while (0) -#define _ZE_ERROR_MSG_NOTERMINATE(NAME, RES) \ - do { \ - fprintf(stderr, "%s() error at %d(%s): res=%x\n", (NAME), __LINE__, __FILE__, (RES)); \ - } while (0) -#define _ERROR_MSG(MSG) \ - { \ - perror((MSG)) do { \ - { \ - perror((MSG)); \ - fprintf(stderr, "errno=%d at %d(%s)", errno, __LINE__, __FILE__); \ - } \ - while (0) +#define _ZE_ERROR_MSG(NAME,RES) do {\ + fprintf(stderr,"%s() failed at %d(%s): res=%x\n",(NAME),__LINE__,__FILE__,(RES));\ +} while (0) +#define _ZE_ERROR_MSG_NOTERMINATE(NAME,RES) do {\ + fprintf(stderr,"%s() error at %d(%s): res=%x\n",(NAME),__LINE__,__FILE__,(RES));\ +} while (0) +#define _ERROR_MSG(MSG) do {\ +perror((MSG));fprintf(stderr, "errno=%d at %d(%s)\n", errno, __LINE__, __FILE__);\ +} while (0) +#define _USAGE_MSG(MSG, ARGV0) do {\ + fprintf(stderr, "Usage: %s %s\n", (ARGV0), (MSG));\ +} while (0) +#define _DL_ERROR_MSG() do {\ + fprintf(stderr, "dlopen error: %s at %d(%s)\n", dlerror(), __LINE__, __FILE__);\ +} while(0) static void intializeFrequency() { ze_result_t res; @@ -753,7 +735,6 @@ static void thapi_sampling_energy() { } } } -volatile bool running = true; void process_sampling() { struct timespec interval; @@ -771,8 +752,6 @@ void cleanup_sampling() { void signal_handler(int signum) { if (signum == SIG_SAMPLING_FINISH) { - printf("Received FINISH signal, stopping daemon...\n"); - // running = false; cleanup_sampling(); running = false; } @@ -780,15 +759,17 @@ void signal_handler(int signum) { int main(int argc, char **argv) { - fprintf(stderr, "Entering Main.\n"); if (argc < 2) { - fprintf(stderr, "Usage: %s \n", argv[0]); + _USAGE_MSG("", argv[0]); return 1; } int parent_pid = atoi(argv[1]); + if (parent_pid <= 0) { + _ERROR_MSG("Invalid or missing parent PID. A positive integer is required."); + return 1; + } int verbose = 0; - fprintf(stderr, "Thapi sampling init.\n"); - thapi_sampling_init(); + thapi_sampling_init();// Initialize sampling // Load necessary libraries void *handle = NULL; @@ -800,27 +781,23 @@ int main(int argc, char **argv) { } if (!handle) { - fprintf(stderr, "Failure: could not load ze library!\n"); + _DL_ERROR_MSG(); return 1; } - - // Initialize daemon + //Find zes symbols find_ze_symbols(handle, verbose); - fprintf(stderr, "Initialize the system.\n"); + //Initialize device and telemetry handles initializeHandles(); - fprintf(stderr, "Daemon initialized and entering signal loop.\n"); // Run the signal loop signal(SIG_SAMPLING_FINISH, signal_handler); - if (parent_pid > 0) { - kill(parent_pid, SIG_SAMPLING_READY); - fprintf(stderr, "Daemon sent READY signal to parent PID %d\n", parent_pid); + + if (kill(parent_pid, SIG_SAMPLING_READY) != 0) { + _ERROR_MSG("Failed to send READY signal to parent"); } - fprintf(stderr, "Daemon waiting for signals in signal_loop.\n"); - // Clearunningnup before exiting + // Process_sampling loop until SIG_SAMPLING_FINISH signal while (running) { - process_sampling(); // Wait for a signal to be received + process_sampling(); } dlclose(handle); - printf("Daemon exiting \n"); return 0; } From feaf2514d98ab60b8d6643965d85b3a3cbd0380d Mon Sep 17 00:00:00 2001 From: sbekele Date: Wed, 13 Nov 2024 22:12:42 +0000 Subject: [PATCH 5/8] comments --- xprof/xprof.rb.in | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/xprof/xprof.rb.in b/xprof/xprof.rb.in index 348b223a..d49496ec 100755 --- a/xprof/xprof.rb.in +++ b/xprof/xprof.rb.in @@ -7,9 +7,6 @@ if Gem::Version.new(RUBY_VERSION) < Gem::Version.new(THAPI_RUBY_MINIMAL_VERSION) exit(1) end -# Define signals for sampling daemon - - # We Cannot use "@ .. @" for libdir, bindir, and dataroodir # as they will appear as bash "${exec_prefix}/lib" @@ -779,12 +776,18 @@ class SamplingDaemon end end +# Start, Stop lttng, amd do the on-node analsysis def trace_and_on_node_processing(usr_argv) def teardown_lttng(syncd, sampling_daemon, pids) + # We need to be sure that all the local ranks are finished syncd.local_barrier('waiting_for_application_ending') + # Everything from now on, is some local-master processing + # The `Sync_daemon` context will handle the call to the global barrier + # for the early exiting ranks return unless mpi_local_master? + # Stop Lttng session and babeltrace daemons lm_lttng_teardown_session if OPTIONS[:archive] LOGGER.debug("Waiting for babeltrace_thapi and dirwatch (#{pids}) to finish") @@ -802,8 +805,11 @@ def trace_and_on_node_processing(usr_argv) sampling_daemon = SamplingDaemon.new if sampling? sampling_daemon&.start(Process.pid) + # Load Tracers and APILoaders Lib backends, h = env_tracers + # All ranks need to set the LLTTNG_HOME env + # so they can have access to the daemon ENV['LTTNG_HOME'] = lttng_home_dir pids = if mpi_local_master? lm_setup_lttng(backends) @@ -826,7 +832,8 @@ def trace_and_on_node_processing(usr_argv) lm_babeltrace(backends) unless OPTIONS[:archive] lm_move_to_shared end - + # Global master rename the unique trace folder to a more + # human friendly name gm_rename_folder if mpi_master? end From 5b2246e358c8d8e27264e5fb77206f6d5e2a1118 Mon Sep 17 00:00:00 2001 From: sbekele Date: Thu, 14 Nov 2024 16:21:23 +0000 Subject: [PATCH 6/8] minor changes --- xprof/xprof.rb.in | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/xprof/xprof.rb.in b/xprof/xprof.rb.in index d49496ec..2bce46c3 100755 --- a/xprof/xprof.rb.in +++ b/xprof/xprof.rb.in @@ -7,7 +7,6 @@ if Gem::Version.new(RUBY_VERSION) < Gem::Version.new(THAPI_RUBY_MINIMAL_VERSION) exit(1) end - # We Cannot use "@ .. @" for libdir, bindir, and dataroodir # as they will appear as bash "${exec_prefix}/lib" # So for now we will rely on them having the default value, @@ -240,9 +239,6 @@ def lttng_home_dir File.join('/', 'tmp', "lttng_home--#{mpi_job_id}") end - - - def thapi_trace_dir_root raise unless mpi_master? @@ -796,21 +792,20 @@ def trace_and_on_node_processing(usr_argv) XprofExitCode.update(status, "babeltrace_thapi or dirwatch #{pid}") end end + # we can kill the session daemon lm_lttng_kill_sessiond sampling_daemon.finalize if sampling_daemon end SyncDaemon.open do |syncd| sampling_daemon = nil - sampling_daemon = SamplingDaemon.new if sampling? - sampling_daemon&.start(Process.pid) - # Load Tracers and APILoaders Lib backends, h = env_tracers # All ranks need to set the LLTTNG_HOME env # so they can have access to the daemon ENV['LTTNG_HOME'] = lttng_home_dir + # Only local master spawn LTTNG daemon and start session pids = if mpi_local_master? lm_setup_lttng(backends) lm_babeltrace(backends) if OPTIONS[:archive] @@ -818,6 +813,11 @@ def trace_and_on_node_processing(usr_argv) syncd.local_barrier('waiting_for_lttng_setup') + if sampling? + sampling_daemon = SamplingDaemon.new + sampling_daemon&.start(Process.pid) + end + # Launch User Command begin XprofExitCode.update(launch_usr_bin(h, usr_argv), usr_argv.join(' ')) @@ -829,6 +829,7 @@ def trace_and_on_node_processing(usr_argv) teardown_lttng(syncd, sampling_daemon, pids) return unless mpi_local_master? + # Preprocess trace lm_babeltrace(backends) unless OPTIONS[:archive] lm_move_to_shared end From 23bd2046b95a9770b9c28ce5ba53260059ad2811 Mon Sep 17 00:00:00 2001 From: sbekele Date: Fri, 15 Nov 2024 14:55:03 +0000 Subject: [PATCH 7/8] PR comments addressed --- xprof/xprof.rb.in | 12 +++++----- ze/Makefile.am | 24 +++----------------- ze/sampling_daemon.c | 41 +++++++++++----------------------- ze/sampling_daemon.h | 7 ------ ze/tracer_ze_helpers.include.c | 9 -------- 5 files changed, 23 insertions(+), 70 deletions(-) delete mode 100644 ze/sampling_daemon.h diff --git a/xprof/xprof.rb.in b/xprof/xprof.rb.in index 2bce46c3..161ab2f3 100755 --- a/xprof/xprof.rb.in +++ b/xprof/xprof.rb.in @@ -748,6 +748,7 @@ class SamplingDaemon daemon_path = "#{__dir__}/sampling_daemon" raise "No sampling_daemon binary found at #{daemon_path}" unless File.exist?(daemon_path) + @pid = spawn("#{daemon_path} #{parent_pid}") Process.detach(@pid) @@ -758,7 +759,8 @@ class SamplingDaemon return unless @pid Process.kill(SIG_SAMPLING_FINISH, @pid) - LOGGER.debug("Sent FINISH signal to sampling daemon PID #{@pid}") + + wait_for_ready_signal end private @@ -768,7 +770,7 @@ class SamplingDaemon Signal.trap(SIG_SAMPLING_READY) do received_ready = true end - sleep(0.1) while !received_ready # Wait until READY signal is received + sleep(0.1) until received_ready # Wait until READY signal is received end end @@ -794,7 +796,7 @@ def trace_and_on_node_processing(usr_argv) end # we can kill the session daemon lm_lttng_kill_sessiond - sampling_daemon.finalize if sampling_daemon + sampling_daemon&.finalize end SyncDaemon.open do |syncd| @@ -814,8 +816,8 @@ def trace_and_on_node_processing(usr_argv) syncd.local_barrier('waiting_for_lttng_setup') if sampling? - sampling_daemon = SamplingDaemon.new - sampling_daemon&.start(Process.pid) + sampling_daemon = SamplingDaemon.new + sampling_daemon&.start(Process.pid) end # Launch User Command diff --git a/ze/Makefile.am b/ze/Makefile.am index adf9507d..a38f6ad1 100644 --- a/ze/Makefile.am +++ b/ze/Makefile.am @@ -140,8 +140,6 @@ CLEANFILES += \ $(ZE_STATIC_PROBES_INCL) \ $(ZE_STATIC_PROBES_SRC) - - EXTRA_DIST += \ gen_probe_base.rb \ $(ZE_GEN_TRACEPOINTS) \ @@ -153,22 +151,19 @@ BUILT_SOURCES = \ bin_PROGRAMS = sampling_daemon -sampling_daemon_SOURCES = \ - sampling_daemon.c \ - sampling_daemon.h +sampling_daemon_SOURCES = sampling_daemon.c nodist_sampling_daemon_SOURCES = \ $(ZE_PROBES_INCL) \ $(ZE_STATIC_PROBES_INCL) -sampling_daemon_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/ze/include -I./ +sampling_daemon_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/sampling -I$(top_srcdir)/ze/include -I./ sampling_daemon_CFLAGS = -Wall -Wextra $(WERROR) $(LTTNG_UST_CFLAGS) sampling_daemon_LDADD = libzetracepoints.la -ldl -lpthread $(LTTNG_UST_LIBS) ../sampling/libThapiSampling.la tracer_ze.c: $(srcdir)/gen_ze.rb $(srcdir)/tracer_ze_helpers.include.c $(srcdir)/ze.h.include $(ZE_MODEL) $(ZE_PROBES_INCL) $(ZE_STATIC_PROBES_INCL) SRC_DIR=$(srcdir) $(RUBY) $< > $@ - EXTRA_DIST += \ gen_ze.rb \ tracer_ze_helpers.include.c @@ -196,26 +191,13 @@ nodist_libze_loader_la_SOURCES = \ $(ZE_PROBES_INCL) \ $(ZE_STATIC_PROBES_INCL) \ tracer_ze.c - + libze_loader_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/include -I$(top_srcdir)/sampling -I$(srcdir)/include -I$(top_srcdir)/utils -I./ libze_loader_la_CFLAGS = -Wall -Wextra $(WERROR) $(LIBFFI_CFLAGS) $(LTTNG_UST_CFLAGS) libze_loader_la_LDFLAGS = $(LTTNG_UST_LIBS) -ldl -lpthread $(LIBFFI_LIBS) ../sampling/libThapiSampling.la libze_loader_la_LDFLAGS += -version-info 1:0:0 libze_loader_la_LIBADD = libzetracepoints.la -install-exec-hook: - $(MKDIR_P) $(DESTDIR)$(pkglibdir)/ze - $(LN_S) -f $(DESTDIR)$(libdir)/libze_loader.so.1.0.0 $(DESTDIR)$(pkglibdir)/ze/libze_loader.so.1 - $(LN_S) -f $(DESTDIR)$(pkglibdir)/ze/libze_loader.so.1 $(DESTDIR)$(pkglibdir)/ze/libze_loader.so - $(MKDIR_P) $(DESTDIR)$(pkglibdir)/bt2 - $(LN) -f $(DESTDIR)$(libdir)/libZEInterval.so $(DESTDIR)$(pkglibdir)/bt2/libZEInterval.so - -uninstall-hook: - $(RM) -f $(DESTDIR)$(pkglibdir)/ze/libze_loader.so.1 $(DESTDIR)$(pkglibdir)/ze/libze_loader.so - -rmdir $(DESTDIR)$(pkglibdir)/ze - $(RM) -f $(DESTDIR)$(pkglibdir)/bt2/libZEInterval.so - -rmdir $(DESTDIR)$(pkglibdir)/bt2 - tmplibdir = $(libdir)/tmp install-data-hook: diff --git a/ze/sampling_daemon.c b/ze/sampling_daemon.c index 2bdd973d..987edf4c 100644 --- a/ze/sampling_daemon.c +++ b/ze/sampling_daemon.c @@ -1,5 +1,4 @@ -#include "sampling_daemon.h" -#include "../sampling/thapi_sampling.h" +#include "thapi_sampling.h" #include "ze_build.h" #include "ze_sampling.h" #include @@ -18,45 +17,25 @@ #define SIG_SAMPLING_FINISH (SIGRTMIN + 1) #define ZES_INIT_PTR zesInit_ptr - #define ZES_DRIVER_GET_PTR zesDriverGet_ptr - #define ZES_DEVICE_GET_PTR zesDeviceGet_ptr - #define ZES_DEVICE_GET_PROPERTIES_PTR zesDeviceGetProperties_ptr - #define ZES_DEVICE_ENUM_POWER_DOMAINS_PTR zesDeviceEnumPowerDomains_ptr - #define ZES_POWER_GET_PROPERTIES_PTR zesPowerGetProperties_ptr - #define ZES_POWER_GET_ENERGY_COUNTER_PTR zesPowerGetEnergyCounter_ptr - #define ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR zesDeviceEnumFrequencyDomains_ptr - #define ZES_FREQUENCY_GET_PROPERTIES_PTR zesFrequencyGetProperties_ptr - #define ZES_FREQUENCY_GET_STATE_PTR zesFrequencyGetState_ptr - #define ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR zesDeviceEnumEngineGroups_ptr - #define ZES_ENGINE_GET_PROPERTIES_PTR zesEngineGetProperties_ptr - #define ZES_ENGINE_GET_ACTIVITY_PTR zesEngineGetActivity_ptr - #define ZES_DEVICE_ENUM_FABRIC_PORTS_PTR zesDeviceEnumFabricPorts_ptr - #define ZES_FABRIC_PORT_GET_PROPERTIES_PTR zesFabricPortGetProperties_ptr - #define ZES_FABRIC_PORT_GET_STATE_PTR zesFabricPortGetState_ptr - #define ZES_FABRIC_PORT_GET_THROUGHPUT_PTR zesFabricPortGetThroughput_ptr - #define ZES_DEVICE_ENUM_MEMORY_MODULES_PTR zesDeviceEnumMemoryModules_ptr - #define ZES_MEMORY_GET_PROPERTIES_PTR zesMemoryGetProperties_ptr - #define ZES_MEMORY_GET_STATE_PTR zesMemoryGetState_ptr - #define ZES_MEMORY_GET_BANDWIDTH_PTR zesMemoryGetBandwidth_ptr typedef ze_result_t (*zesInit_t)(zes_init_flags_t flags); @@ -735,14 +714,15 @@ static void thapi_sampling_energy() { } } } -void process_sampling() { +void process_sampling() { struct timespec interval; interval.tv_sec = 0; interval.tv_nsec = 50000000; // 50ms interval thapi_sampling_energy(); _sampling_handle = thapi_register_sampling(&thapi_sampling_energy, &interval); } + void cleanup_sampling() { if (_sampling_handle) { thapi_unregister_sampling(_sampling_handle); @@ -758,21 +738,23 @@ void signal_handler(int signum) { } int main(int argc, char **argv) { - + + int parent_pid = 0; + int verbose = 0; + void *handle = NULL; if (argc < 2) { _USAGE_MSG("", argv[0]); return 1; } - int parent_pid = atoi(argv[1]); + parent_pid = atoi(argv[1]); if (parent_pid <= 0) { - _ERROR_MSG("Invalid or missing parent PID. A positive integer is required."); + _ERROR_MSG("Invalid or missing parent PID."); return 1; } - int verbose = 0; + thapi_sampling_init();// Initialize sampling // Load necessary libraries - void *handle = NULL; char *s = getenv("LTTNG_UST_ZE_LIBZE_LOADER"); if (s) { handle = dlopen(s, RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); @@ -798,6 +780,9 @@ int main(int argc, char **argv) { while (running) { process_sampling(); } + if (parent_pid != 0) + kill(parent_pid, SIG_SAMPLING_READY); + dlclose(handle); return 0; } diff --git a/ze/sampling_daemon.h b/ze/sampling_daemon.h deleted file mode 100644 index 4f4cd6ed..00000000 --- a/ze/sampling_daemon.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef SAMPLING_DAEMON_H -#define SAMPLING_DAEMON_H - -void initialize_sampling(); -void cleanup_sampling(); - -#endif // SAMPLING_DAEMON_H \ No newline at end of file diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index ebdc7369..7e5f4a25 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -1,6 +1,3 @@ -//#include "thapi_sampling.h" -//#include "sampling_daemon.h" - #ifdef THAPI_DEBUG #define TAHPI_LOG stderr #define THAPI_DBGLOG(fmt, ...) \ @@ -41,7 +38,6 @@ static int _do_cleanup = 0; static int _do_chained_structs = 0; static int _do_paranoid_drift = 0; static int _do_paranoid_memory_location = 0; -//thapi_sampling_handle_t _sampling_handle = NULL; pthread_mutex_t ze_closures_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -788,15 +784,10 @@ static inline void _dump_memory_info(ze_command_list_handle_t hCommandList, cons {perror((MSG)); fprintf(stderr,"errno=%d at %d(%s)",errno,__LINE__,__FILE__);\ } while (0) - - static void _load_tracer(void) { char *s = NULL; void *handle = NULL; int verbose = 0; - //struct timespec interval; - //thapi_sampling_init(); - s = getenv("LTTNG_UST_ZE_LIBZE_LOADER"); if (s) handle = dlopen(s, RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); From 0356e3c21f16a946f9abf94dd51a886b3df15174 Mon Sep 17 00:00:00 2001 From: sbekele Date: Tue, 26 Nov 2024 20:02:15 +0000 Subject: [PATCH 8/8] ze make updated --- xprof/xprof.rb.in | 4 ++-- ze/Makefile.am | 6 +++++- ze/sampling_daemon.c | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/xprof/xprof.rb.in b/xprof/xprof.rb.in index 161ab2f3..b5d805f7 100755 --- a/xprof/xprof.rb.in +++ b/xprof/xprof.rb.in @@ -813,13 +813,13 @@ def trace_and_on_node_processing(usr_argv) lm_babeltrace(backends) if OPTIONS[:archive] end - syncd.local_barrier('waiting_for_lttng_setup') - if sampling? sampling_daemon = SamplingDaemon.new sampling_daemon&.start(Process.pid) end + syncd.local_barrier('waiting_for_lttng_setup') + # Launch User Command begin XprofExitCode.update(launch_usr_bin(h, usr_argv), usr_argv.join(' ')) diff --git a/ze/Makefile.am b/ze/Makefile.am index a38f6ad1..65da9ed1 100644 --- a/ze/Makefile.am +++ b/ze/Makefile.am @@ -185,7 +185,11 @@ libzetracepoints_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(top_srcdir)/utils/inclu libzetracepoints_la_CFLAGS = -fPIC -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Wno-sign-compare $(WERROR) $(LTTNG_UST_CFLAGS) libzetracepoints_la_LDFLAGS = $(LTTNG_UST_LIBS) -lib_LTLIBRARIES = libze_loader.la libZEInterval.la +zedir = $(pkglibdir)/ze +ze_LTLIBRARIES = libze_loader.la + +bt2dir = $(pkglibdir)/bt2 +bt2_LTLIBRARIES = libZEInterval.la nodist_libze_loader_la_SOURCES = \ $(ZE_PROBES_INCL) \ diff --git a/ze/sampling_daemon.c b/ze/sampling_daemon.c index 987edf4c..db87a0a2 100644 --- a/ze/sampling_daemon.c +++ b/ze/sampling_daemon.c @@ -742,10 +742,12 @@ int main(int argc, char **argv) { int parent_pid = 0; int verbose = 0; void *handle = NULL; + if (argc < 2) { _USAGE_MSG("", argv[0]); return 1; } + parent_pid = atoi(argv[1]); if (parent_pid <= 0) { _ERROR_MSG("Invalid or missing parent PID.");