diff --git a/xprof/btx_timeline.cpp b/xprof/btx_timeline.cpp index 88a00444..31b0f248 100644 --- a/xprof/btx_timeline.cpp +++ b/xprof/btx_timeline.cpp @@ -45,7 +45,8 @@ struct Details { using timeline_dispatch_t = struct timeline_dispatch_s; using uuid_getter_t = perfetto_uuid_t (*)(timeline_dispatch_t *, const std::string &, uint64_t, uint64_t, uint32_t, uint64_t, - uint32_t, std::optional); + uint32_t, std::optional
); + static perfetto_uuid_t gen_perfetto_uuid() { // Start at one, Look like UUID 0 is special static std::atomic uuid{1}; @@ -193,62 +194,6 @@ static void add_event_DTelemetry(timeline_dispatch_t *dispatch, const std::strin perfetto_uuid_t track_uuid; track_uuid = uuid_getter(dispatch, hostname, process_id, did, deviceIdx, tHandle, subDevice, options); - auto *packet = dispatch->trace.add_packet(); - packet->set_timestamp(0); - packet->set_trusted_packet_sequence_id(TRUSTED_PACKED_SEQUENCE_ID); - auto *track_descriptor = packet->mutable_track_descriptor(); - track_descriptor->set_uuid(hp_dev_uuid); - track_descriptor->set_parent_uuid(hp_uuid); - std::ostringstream oss; - if (details) { - oss << track_name << " | SD " << domain; - oss << " | " << details->fabricId << "<->"<< details->remotePortId << " | " <<(details->RxTx ? " TX" : " RX"); - } else { - oss << track_name << " | Domain " << domain; - } - track_descriptor->set_name(oss.str()); - auto *counter_descriptor = track_descriptor->mutable_counter(); - counter_descriptor->set_unit_multiplier(unit_multiplier); - return hp_dev_uuid; -} - -static perfetto_uuid_t get_copyEU_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, - uintptr_t did, uint32_t subDevice, std::optional options) { - return get_counter_track_uuuid(dispatch, dispatch->hp_dsdev2cpytracks, "CopyEngine (%)", hostname, process_id, did, subDevice, 100); -} - -static perfetto_uuid_t get_fpThroughput_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, - uintptr_t did, uint32_t subDevice, std::optional options) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2pwrtracks, "FabricT", hostname, process_id, did, subDevice, 100, options, &dispatch->hp_dfsdev2fptracks); -} - -static perfetto_uuid_t get_power_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, - uintptr_t did, uint32_t subDevice, std::optional options) { - // Extra space to maintain track sequence in the timeline - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2pwrtracks, " Power", hostname, process_id, did, subDevice, 100); -} - -static perfetto_uuid_t get_frequency_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, - uintptr_t did, uint32_t subDevice, std::optional options) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2frqtracks, " Ferquency", hostname, process_id, did, subDevice, 100); -} - -static perfetto_uuid_t get_computeEU_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, - uintptr_t did, uint32_t subDevice, std::optional options ) { - return get_counter_track_uuuid(dispatch, dispatch->hp_dsdev2cpetracks, "ComputeEngine (%)", hostname, process_id, did, subDevice, 100); -} - -static void add_event_DTelemetry(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, - uint64_t thread_id, uintptr_t did, uint32_t subDevice, - uint64_t timestamp, float value, uuid_getter_t uuid_getter, const std::string &eventName, - std::optional options = std::nullopt) { - perfetto_uuid_t track_uuid; - if (options.has_value()) { - track_uuid = uuid_getter(dispatch, hostname, process_id, did, subDevice, options); - } else { - track_uuid = uuid_getter(dispatch, hostname, process_id, did, subDevice, std::nullopt); - } - auto *packet = dispatch->trace.add_packet(); packet->set_trusted_packet_sequence_id(TRUSTED_PACKED_SEQUENCE_ID); packet->set_timestamp(timestamp); diff --git a/ze/btx_zeinterval_callbacks.cpp b/ze/btx_zeinterval_callbacks.cpp index 1fb1111e..23ea66ee 100644 --- a/ze/btx_zeinterval_callbacks.cpp +++ b/ze/btx_zeinterval_callbacks.cpp @@ -821,7 +821,7 @@ static void lttng_ust_ze_sampling_fabricPort_callback(void *btx_handle, void *us double rxThroughput = static_cast(pFabricPortThroughput_val->rxCounter - prev_throughput.rxCounter) / time_diff; double txThroughput = static_cast(pFabricPortThroughput_val->txCounter - prev_throughput.txCounter) / time_diff; DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); - if (rxThroughput != 0) + if (rxThroughput != 0 || txThroughput != 0) btx_push_message_lttng_fabricPort(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE, uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hFabricPort, subDevice, fabricId, remotePortId, rxThroughput, txThroughput, @@ -964,7 +964,7 @@ static void lttng_ust_ze_sampling_deviceProperties_callback(void *btx_handle, vo auto *data = static_cast(usr_data); data->sampling_device_property[{hostname, vpid, hDevice}] = {*pDeviceProperties_val, deviceIdx}; } -/* + static void lttng_ust_ze_sampling_subDeviceProperties_callback(void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid, zes_device_handle_t hDevice, ze_device_handle_t hSubDevice, size_t _pSubDeviceProperties_val_length, @@ -972,200 +972,26 @@ static void lttng_ust_ze_sampling_subDeviceProperties_callback(void *btx_handle, auto *data = static_cast(usr_data); data->sampling_sub_device_property[{hostname, vpid, (ze_device_handle_t)hSubDevice}] = *pSubDeviceProperties_val; } -*/ + static void lttng_ust_ze_sampling_fabricPortProperties_callback(void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid, - zes_device_handle_t hDevice, zes_fabric_port_handle_t hFabricPort, + ze_device_handle_t hDevice, zes_fabric_port_handle_t hFabricPort, size_t _pFabricPortProperties_val_length, zes_fabric_port_properties_t *pFabricPortProperties_val) { auto *data = static_cast(usr_data); - data->fabricPort_property[{hostname, vpid, (zes_device_handle_t)hDevice, (zes_fabric_port_handle_t)hFabricPort}] = *pFabricPortProperties_val; + data->fabricPort_property[{hostname, vpid, (ze_device_handle_t)hDevice, (zes_fabric_port_handle_t)hFabricPort}] = *pFabricPortProperties_val; } static void lttng_ust_ze_sampling_memoryProperties_callback(void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid, - zes_device_handle_t hDevice, zes_mem_handle_t hMemModule, + ze_device_handle_t hDevice, zes_mem_handle_t hMemModule, size_t _pMemModuleProperties_val_length, zes_mem_properties_t *pMemModuleProperties_val) { auto *data = static_cast(usr_data); - data->memModule_property[{hostname, vpid, (zes_device_handle_t)hDevice, (zes_mem_handle_t)hMemModule}] = *pMemModuleProperties_val; -} - - -static void lttng_ust_ze_sampling_powerProperties_callback(void *btx_handle, void *usr_data, int64_t ts, - const char *hostname, int64_t vpid, uint64_t vtid, - zes_device_handle_t hDevice, zes_pwr_handle_t hPower, - size_t _pPowerProperties_val_length, - zes_power_properties_t *pPowerProperties_val) { - auto *data = static_cast(usr_data); - data->power_property[{hostname, vpid, (ze_device_handle_t)hDevice, (zes_pwr_handle_t)hPower}] = *pPowerProperties_val; -} - -static void lttng_ust_ze_sampling_freqProperties_callback(void *btx_handle, void *usr_data, int64_t ts, - const char *hostname, int64_t vpid, uint64_t vtid, - zes_device_handle_t hDevice, zes_freq_handle_t hFrequency, - size_t _pfreqProperties_val_length, - zes_freq_properties_t *pFreqProperties_val) { - auto *data = static_cast(usr_data); - data->frequency_property[{hostname, vpid, (zes_device_handle_t)hDevice, (zes_freq_handle_t)hFrequency}] = *pFreqProperties_val; -} - -static void lttng_ust_ze_sampling_engineProperties_callback(void *btx_handle, void *usr_data, int64_t ts, - const char *hostname, int64_t vpid, uint64_t vtid, - zes_device_handle_t hDevice, zes_engine_handle_t hEngine, - size_t _pEngineProperties_val_length, - zes_engine_properties_t *pEngineProperties_val) { - auto *data = static_cast(usr_data); - data->engine_property[{hostname, vpid, (zes_device_handle_t)hDevice, (zes_engine_handle_t)hEngine}] = *pEngineProperties_val; -} - -static void lttng_ust_ze_sampling_fabricPort_callback(void *btx_handle, void *usr_data, int64_t ts, - const char *hostname, int64_t vpid, - uint64_t vtid, ze_device_handle_t hDevice, - zes_fabric_port_handle_t hFabricPort, - size_t _pFabricPortState_val_length, - zes_fabric_port_state_t *pFabricPortState_val, - size_t _pFabricPortThroughput_val_length, - zes_fabric_port_throughput_t *pFabricPortThroughput_val) { - auto *data = static_cast(usr_data); - const auto it0 = data->fabricPort_property.find({hostname, vpid, hDevice, hFabricPort}); - if (it0 != data->fabricPort_property.cend()) { - // Get fabricPort properties: subdevice ID, fabeicId ... - auto subDevice = it0->second.subdeviceId; - auto fabricId = it0->second.portId.fabricId; - auto remotePortId = pFabricPortState_val->remotePortId.fabricId; - // Current Speed - double rxSpeed = static_cast(pFabricPortState_val->rxSpeed.bitRate * pFabricPortState_val->rxSpeed.width)/8.0; - double txSpeed = static_cast(pFabricPortState_val->txSpeed.bitRate * pFabricPortState_val->txSpeed.width)/8.0; - - // Insert the current throughput data with timestamp - auto [it, inserted] = data->device_fabricPort_ref.insert( - {{hostname, vpid, hDevice, hFabricPort, subDevice}, {*pFabricPortThroughput_val, ts}}); - if (inserted) - return; - - // Previous throughput data - auto &[prev_throughput, prev_ts] = it->second; - - /* Per doc: When taking the delta, the difference between timestamp samples - * could be 0, if the frequency of sampling the snapshots is higher than the - * frequency of the timestamp update. */ - if (pFabricPortThroughput_val->timestamp == prev_throughput.timestamp) - return; - - double time_diff = static_cast(pFabricPortThroughput_val->timestamp - prev_throughput.timestamp); - // Calculate the RX and TX throughput - double rxThroughput = static_cast(pFabricPortThroughput_val->rxCounter - prev_throughput.rxCounter) / double(time_diff); - double txThroughput = static_cast(pFabricPortThroughput_val->txCounter - prev_throughput.txCounter) / double(time_diff); - - btx_push_message_lttng_fabricPort(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE, - (uint64_t)hDevice,(uint64_t)hFabricPort, subDevice, - fabricId, remotePortId, rxThroughput, txThroughput, - rxSpeed, txSpeed); - // Update the stored values - it->second = {*pFabricPortThroughput_val, ts}; - } else { - std::cerr << "Fabricport property not found!" << std::endl; - } -} - -static void lttng_ust_ze_sampling_engineStats_callback(void *btx_handle, void *usr_data, int64_t ts, - const char *hostname, int64_t vpid, - uint64_t vtid, ze_device_handle_t hDevice, - zes_engine_handle_t hEngine, - size_t _pEngineStats_val_length, - zes_engine_stats_t *pEngineStats_val) { - auto *data = static_cast(usr_data); - const auto it0 = data->engine_property.find({hostname, vpid, hDevice, hEngine}); - if (it0 != data->engine_property.cend()) { - const auto& engineProps = it0->second; - uint32_t subDevice = (engineProps.onSubdevice) ? engineProps.subdeviceId : 0; - - if (engineProps.type == ZES_ENGINE_GROUP_COMPUTE_ALL || engineProps.type == ZES_ENGINE_GROUP_COPY_ALL) { - auto [it, inserted] = data->device_engines_ref.insert( - {{hostname, vpid, hDevice, hEngine, subDevice}, {*pEngineStats_val, ts}}); - if (inserted) - return; - auto &[prev_engineStats, prev_ts] = it->second; - if (pEngineStats_val->timestamp == prev_engineStats.timestamp) - return; - - double time_diff = static_cast(pEngineStats_val->timestamp - prev_engineStats.timestamp); - double activeTime = static_cast(pEngineStats_val->activeTime - prev_engineStats.activeTime) / time_diff; - - if (engineProps.type == ZES_ENGINE_GROUP_COMPUTE_ALL) { - btx_push_message_lttng_computeEU(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE, - (uint64_t)hDevice, subDevice, activeTime); - } else { - btx_push_message_lttng_copyEU(btx_handle, hostname,0, 0, prev_ts, BACKEND_ZE, - (uint64_t)hDevice, subDevice, activeTime); - } - it->second = {*pEngineStats_val, ts}; - } - } else { - std::cerr << "Engine property not found for device: " << hDevice << std::endl; - } -} - -static void lttng_ust_ze_sampling_gpu_energy_callback(void *btx_handle, void *usr_data, int64_t ts, - const char *hostname, int64_t vpid, - uint64_t vtid, ze_device_handle_t hDevice, - zes_pwr_handle_t hPower, - size_t _pEnergyCounter_val_length, - zes_power_energy_counter_t *pEnergyCounter_val) { - auto *data = static_cast(usr_data); - const auto it0 = data->power_property.find({hostname, vpid, hDevice, hPower}); - if (it0 != data->power_property.cend()) { - const auto& powerProps = it0->second; - uint32_t domain = (powerProps.onSubdevice) ? powerProps.subdeviceId + 1 : 0; - auto [it, inserted] = data->device_energy_ref.insert( - {{hostname, vpid, hDevice, hPower, domain}, {*pEnergyCounter_val, ts}}); - if (inserted) - return; - - auto &[prev_EnergyCounter, prev_ts] = it->second; - if (pEnergyCounter_val->timestamp == prev_EnergyCounter.timestamp) - return; - - double time_diff = static_cast(pEnergyCounter_val->timestamp - prev_EnergyCounter.timestamp); - double power = static_cast(pEnergyCounter_val->energy - prev_EnergyCounter.energy) / time_diff; - btx_push_message_lttng_power(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE, (uint64_t)hDevice, - (thapi_domain_idx)domain, power); - it->second = {*pEnergyCounter_val, ts}; - } else { - std::cerr << "Power property not found for device: " << hDevice << std::endl; - } + data->memModule_property[{hostname, vpid, (ze_device_handle_t)hDevice, (zes_mem_handle_t)hMemModule}] = *pMemModuleProperties_val; } -static void lttng_ust_ze_sampling_gpu_frequency_callback(void *btx_handle, void *usr_data, int64_t ts, - const char *hostname, int64_t vpid, - uint64_t vtid, ze_device_handle_t hDevice, - zes_freq_handle_t hFrequency, - size_t _pFreqState_val_length, - zes_freq_state_t *pFreqState_val) { - auto *data = static_cast(usr_data); - const auto it0 = data->frequency_property.find({hostname, vpid, hDevice, hFrequency}); - if (it0 != data->frequency_property.cend()) { - const auto& freqProps = it0->second; - uint32_t domain = (freqProps.onSubdevice) ? freqProps.subdeviceId : 0; - - btx_push_message_lttng_frequency(btx_handle, hostname, 0, 0, ts, BACKEND_ZE, (uint64_t)hDevice, - (thapi_domain_idx)domain, pFreqState_val->actual); - }else { - std::cerr << "Frequency property not found for device: " << hDevice << std::endl; - } -} -// Properties -static void lttng_ust_ze_sampling_fabricPortProperties_callback(void *btx_handle, void *usr_data, int64_t ts, - const char *hostname, int64_t vpid, uint64_t vtid, - ze_device_handle_t hDevice, zes_fabric_port_handle_t hFabricPort, - size_t _pFabricPortProperties_val_length, - zes_fabric_port_properties_t *pFabricPortProperties_val) { - auto *data = static_cast(usr_data); - data->fabricPort_property[{hostname, vpid, (ze_device_handle_t)hDevice, (zes_fabric_port_handle_t)hFabricPort}] = *pFabricPortProperties_val; -} - static void lttng_ust_ze_sampling_powerProperties_callback(void *btx_handle, void *usr_data, int64_t ts, const char *hostname, int64_t vpid, uint64_t vtid, ze_device_handle_t hDevice, zes_pwr_handle_t hPower, @@ -1295,8 +1121,8 @@ void btx_register_usr_callbacks(void *btx_handle) { //Properties btx_register_callbacks_lttng_ust_ze_sampling_deviceProperties( btx_handle, <tng_ust_ze_sampling_deviceProperties_callback); - // btx_register_callbacks_lttng_ust_ze_sampling_subDeviceProperties( - // btx_handle, <tng_ust_ze_sampling_subDeviceProperties_callback); + btx_register_callbacks_lttng_ust_ze_sampling_subDeviceProperties( + btx_handle, <tng_ust_ze_sampling_subDeviceProperties_callback); btx_register_callbacks_lttng_ust_ze_sampling_fabricPortProperties( btx_handle, <tng_ust_ze_sampling_fabricPortProperties_callback); btx_register_callbacks_lttng_ust_ze_sampling_powerProperties( diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index 2aeac60c..078fcc25 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -797,7 +797,7 @@ static int _sampling_engines_initialized = 0; // Static handles to stay throughout the execution static zes_driver_handle_t* _sampling_hDrivers = NULL; static zes_device_handle_t** _sampling_hDevices = NULL; -static zes_device_handle_t*** _sampling_hSubDevices = NULL; +static ze_device_handle_t*** _sampling_hSubDevices = NULL; static zes_freq_handle_t*** _sampling_hFrequencies = NULL; static zes_pwr_handle_t*** _sampling_hPowers = NULL; static zes_engine_handle_t*** _sampling_engineHandles = NULL; @@ -821,7 +821,6 @@ static void intializeFrequency() { _sampling_hFrequencies[driverIdx] = (zes_freq_handle_t**) calloc(_sampling_deviceCount[driverIdx], sizeof(zes_freq_handle_t*)); for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { // Get frequency domains for each device - _sampling_freqDomainCounts[driverIdx][deviceIdx] = 0; res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], &_sampling_freqDomainCounts[driverIdx][deviceIdx], NULL); if (res != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); @@ -860,7 +859,6 @@ static void intializePower() { _sampling_powerDomainCounts[driverIdx] = (uint32_t*) calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { // Get power domains for each device - _sampling_powerDomainCounts[driverIdx][deviceIdx] = 0; res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[driverIdx][deviceIdx], &_sampling_powerDomainCounts[driverIdx][deviceIdx], NULL); if (res != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); @@ -900,7 +898,6 @@ static void intializeEngines() { _sampling_engineCounts[driverIdx] = (uint32_t*) calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { // Get engine counts for each device - _sampling_engineCounts[driverIdx][deviceIdx] = 0; res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[driverIdx][deviceIdx], &_sampling_engineCounts[driverIdx][deviceIdx], NULL); if (res != ZE_RESULT_SUCCESS || _sampling_engineCounts[driverIdx][deviceIdx] == 0) { _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); @@ -938,7 +935,6 @@ static void intializeFabricPorts() { _sampling_hFabricPort[driverIdx] = (zes_fabric_port_handle_t**) calloc(_sampling_deviceCount[driverIdx], sizeof(zes_fabric_port_handle_t*)); for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { // Get fabric ports for each device - _sampling_fabricPortCount[driverIdx][deviceIdx]=0; res = ZES_DEVICE_ENUM_FABRIC_PORTS_PTR(_sampling_hDevices[driverIdx][deviceIdx], &_sampling_fabricPortCount[driverIdx][deviceIdx], NULL); if (res != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FABRIC_PORTS_PTR", res); @@ -978,7 +974,6 @@ static void intializeMemModules() { _sampling_hMemModule[driverIdx] = (zes_mem_handle_t**) calloc(_sampling_deviceCount[driverIdx], sizeof(zes_mem_handle_t*)); for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { // Get fabric ports for each device - _sampling_memModuleCount[driverIdx][deviceIdx]=0; res = ZES_DEVICE_ENUM_MEMORY_MODULES_PTR(_sampling_hDevices[driverIdx][deviceIdx], &_sampling_memModuleCount[driverIdx][deviceIdx], NULL); if (res != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_MEMORY_MODULES_PTR", res); @@ -1033,9 +1028,8 @@ static int initializeHandles() { _sampling_deviceCount = (uint32_t*) calloc(_sampling_driverCount, sizeof(uint32_t)); _sampling_subDeviceCount = (uint32_t**) calloc(_sampling_driverCount, sizeof(uint32_t*)); _sampling_hDevices = (zes_device_handle_t**) calloc(_sampling_driverCount, sizeof(zes_device_handle_t*)); - _sampling_hSubDevices = (zes_device_handle_t***) calloc(_sampling_driverCount, sizeof(zes_device_handle_t**)); + _sampling_hSubDevices = (ze_device_handle_t***) calloc(_sampling_driverCount, sizeof(ze_device_handle_t**)); for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { - _sampling_deviceCount[driverIdx] = 0; res = ZES_DEVICE_GET_PTR(_sampling_hDrivers[driverIdx], &_sampling_deviceCount[driverIdx], NULL); if (res != ZE_RESULT_SUCCESS || _sampling_deviceCount[driverIdx] == 0) { fprintf(stderr, "ERROR: No device found!\n"); @@ -1053,7 +1047,6 @@ static int initializeHandles() { _sampling_subDeviceCount[driverIdx] = (uint32_t*) calloc(_sampling_deviceCount[driverIdx], sizeof(uint32_t)); _sampling_hSubDevices[driverIdx] = (ze_device_handle_t**) calloc(_sampling_deviceCount[driverIdx], sizeof(ze_device_handle_t*)); for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - // zes_device_ext_properties_t deviceProps = {0}; zes_device_properties_t deviceProps = {0}; deviceProps.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES; deviceProps.pNext = NULL; @@ -1064,30 +1057,29 @@ static int initializeHandles() { do_tracepoint(lttng_ust_ze_sampling, deviceProperties, (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], deviceIdx, &deviceProps ); - _sampling_subDeviceCount[driverIdx][deviceIdx] = 0; - res = ZES_DEVICE_GET_SUB_DEVICES_PTR(_sampling_hDevices[driverIdx][deviceIdx], &_sampling_subDeviceCount[driverIdx][deviceIdx], NULL); + res = ZE_DEVICE_GET_SUB_DEVICES_PTR((ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], &_sampling_subDeviceCount[driverIdx][deviceIdx], NULL); if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_DEVICE_GET_SUB_DEVICES_PTR", res); + _ZE_ERROR_MSG("ZE_DEVICE_GET_SUB_DEVICES_PTR", res); _sampling_subDeviceCount[driverIdx][deviceIdx] = 0; } if (_sampling_subDeviceCount[driverIdx][deviceIdx] > 0) { _sampling_hSubDevices[driverIdx][deviceIdx] = (ze_device_handle_t*) calloc(_sampling_subDeviceCount[driverIdx][deviceIdx], sizeof(ze_device_handle_t)); - res = ZES_DEVICE_GET_SUB_DEVICES_PTR(_sampling_hDevices[driverIdx][deviceIdx], &_sampling_subDeviceCount[driverIdx][deviceIdx], _sampling_hSubDevices[driverIdx][deviceIdx]); + res = ZE_DEVICE_GET_SUB_DEVICES_PTR((ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], &_sampling_subDeviceCount[driverIdx][deviceIdx], _sampling_hSubDevices[driverIdx][deviceIdx]); if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("ZES_DEVICE_GET_SUB_DEVICES_PTR", res); + _ZE_ERROR_MSG("ZE_DEVICE_GET_SUB_DEVICES_PTR", res); free(_sampling_hSubDevices[driverIdx][deviceIdx]); _sampling_hSubDevices[driverIdx][deviceIdx] = NULL; _sampling_subDeviceCount[driverIdx][deviceIdx] = 0; } for (uint32_t subDeviceIdx = 0; subDeviceIdx < _sampling_subDeviceCount[driverIdx][deviceIdx]; subDeviceIdx++) { - zes_device_properties_t subDeviceProps = {0}; - subDeviceProps.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES; + ze_device_properties_t subDeviceProps = {0}; + subDeviceProps.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; subDeviceProps.pNext = NULL; - res = ZES_DEVICE_GET_PROPERTIES_PTR(_sampling_hSubDevices[driverIdx][deviceIdx][subDeviceIdx], &subDeviceProps); + res = ZE_DEVICE_GET_PROPERTIES_PTR(_sampling_hSubDevices[driverIdx][deviceIdx][subDeviceIdx], &subDeviceProps); if (res != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("ZES_DEVICE_GET_PROPERTIES_PTR", res); } - do_tracepoint(lttng_ust_ze_sampling, subDeviceProperties, (zes_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], (ze_device_handle_t)_sampling_hSubDevices[driverIdx][deviceIdx][subDeviceIdx], &subDeviceProps); + do_tracepoint(lttng_ust_ze_sampling, subDeviceProperties, (ze_device_handle_t)_sampling_hDevices[driverIdx][deviceIdx], (ze_device_handle_t)_sampling_hSubDevices[driverIdx][deviceIdx][subDeviceIdx], &subDeviceProps); } } } @@ -1118,14 +1110,15 @@ static void readFabricPorts_dump(uint32_t driverIdx, uint32_t deviceIdx) { if (!_sampling_fabricPorts_initialized) return; ze_result_t result; for (uint32_t portIdx = 0; portIdx < _sampling_fabricPortCount[driverIdx][deviceIdx]; portIdx++ ) { - zes_fabric_port_throughput_t throughput = {0}; zes_fabric_port_state_t portState = {0}; + portState.pNext = NULL; portState.stype = ZES_STRUCTURE_TYPE_FABRIC_PORT_STATE; result = ZES_FABRIC_PORT_GET_STATE_PTR(_sampling_hFabricPort[driverIdx][deviceIdx][portIdx], &portState); if (result != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_STATE_PTR", result); continue; } + zes_fabric_port_throughput_t throughput = {0}; result = ZES_FABRIC_PORT_GET_THROUGHPUT_PTR(_sampling_hFabricPort[driverIdx][deviceIdx][portIdx], &throughput); if (result != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("ZES_FABRIC_PORT_GET_THROUGHPUT_PTR", result); @@ -1189,9 +1182,6 @@ static void readEngines_dump(uint32_t driverIdx, uint32_t deviceIdx){ static void thapi_sampling_energy() { for (uint32_t driverIdx = 0; driverIdx < _sampling_driverCount; driverIdx++) { for (uint32_t deviceIdx = 0; deviceIdx < _sampling_deviceCount[driverIdx]; deviceIdx++) { - if (tracepoint_enabled(lttng_ust_ze_sampling, fabricPort)) { - readFabricPorts_dump(driverIdx, deviceIdx); - } if (tracepoint_enabled(lttng_ust_ze_sampling, gpu_frequency)){ readFrequency_dump(driverIdx, deviceIdx); } @@ -1201,6 +1191,9 @@ static void thapi_sampling_energy() { if (tracepoint_enabled(lttng_ust_ze_sampling, engineStats)){ readEngines_dump(driverIdx, deviceIdx); } + if (tracepoint_enabled(lttng_ust_ze_sampling, fabricPort)) { + readFabricPorts_dump(driverIdx, deviceIdx); + } if (tracepoint_enabled(lttng_ust_ze_sampling, memStats)){ readMemModules_dump(driverIdx, deviceIdx); } diff --git a/ze/ze_events.yaml b/ze/ze_events.yaml index a229397f..e69eb4d9 100644 --- a/ze/ze_events.yaml +++ b/ze/ze_events.yaml @@ -14,7 +14,7 @@ lttng_ust_ze_sampling: args: - [ zes_device_handle_t, hDevice ] - [ ze_device_handle_t, hSubDevice ] - - [ zes_device_properties_t *, pSubDeviceProperties ] + - [ ze_device_properties_t *, pSubDeviceProperties ] fields: - [ ctf_integer_hex, uintptr_t, hDevice, "(uintptr_t)hDevice" ] - [ ctf_integer_hex, uintptr_t, hSubDevice, "(uintptr_t)hSubDevice" ]