diff --git a/xprof/btx_interval_model.yaml b/xprof/btx_interval_model.yaml index b41d82b7..88279472 100644 --- a/xprof/btx_interval_model.yaml +++ b/xprof/btx_interval_model.yaml @@ -281,6 +281,10 @@ :type: integer_unsigned :field_value_range: 32 :cast_type: uint32_t + - :name: pBandwidth + :field_class: + :type: double + :cast_type: float - :name: rdBandwidth :field_class: :type: double diff --git a/xprof/btx_timeline.cpp b/xprof/btx_timeline.cpp index 6e7e12a2..31b0f248 100644 --- a/xprof/btx_timeline.cpp +++ b/xprof/btx_timeline.cpp @@ -129,15 +129,21 @@ static perfetto_uuid_t get_counter_track_uuuid(timeline_dispatch_t *dispatch, oss << track_name << " Module " << domain; oss << " | " <<(details->RxTx ? "WR BW" : "RD BW"); } - else if (track_name==" Memory Allocation (%)") { + else if (track_name=="Allocated Memory (%)") { oss << track_name << " Module " << domain; } else if (track_name==" CopyEngine (%)" || track_name==" ComputeEngine (%)") { oss << track_name << " | SubDevice " << domain; } - else { - oss << track_name << " | Domain " << domain; + else if (track_name==" Power") { + if(domain==0){ + oss <<" Total Power"; + } + else + oss << track_name << " | SubDevice " << domain-1; } + else + oss << track_name << " | Domain " << domain; track_descriptor->set_name(oss.str()); track_descriptor->mutable_counter(); return hp_dev_uuid; @@ -177,7 +183,7 @@ static perfetto_uuid_t get_Bandwidth_track_uuuid(timeline_dispatch_t *dispatch, static perfetto_uuid_t get_Occupancy_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id, uint64_t did, uint32_t deviceIdx, uint64_t hMemModule, uint32_t subDevice, std::optional
options) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Memory Allocation $", hostname, process_id, did, deviceIdx, hMemModule, subDevice); + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "Allocated Memory (%)", hostname, process_id, did, deviceIdx, hMemModule, subDevice); } @@ -198,7 +204,7 @@ static void add_event_DTelemetry(timeline_dispatch_t *dispatch, const std::strin } static void add_event_memModule( timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, uint64_t thread_id, uint64_t did, uint32_t deviceIdx, uintptr_t hMemModule, uint32_t subDevice, uint64_t timestamp, float rdBandwidth, float wtBandwidth, float occupancy) { + uint64_t process_id, uint64_t thread_id, uint64_t did, uint32_t deviceIdx, uintptr_t hMemModule, uint32_t subDevice, uint64_t timestamp, float pBandwidth, float rdBandwidth, float wtBandwidth, float occupancy) { // Define details for RX throughput. Details details = {false, 0, 0}; add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hMemModule, subDevice, timestamp, @@ -502,10 +508,11 @@ static void fabricPort_usr_callback(void *btx_handle, void *usr_data, const char static void memModule_usr_callback(void *btx_handle, void *usr_data, const char *hostname, int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, - uint64_t did, uint32_t deviceIdx, uint64_t hMemModule, uint32_t subDevice, - float rdBandwidth, float wtBandwidth, float occupancy) { + uint64_t did, uint32_t deviceIdx, uint64_t hMemModule, + uint32_t subDevice, float pBandwidth, float rdBandwidth, + float wtBandwidth, float occupancy) { auto *dispatch = static_cast(usr_data); - add_event_memModule(dispatch, hostname, vpid, vtid, did, deviceIdx, hMemModule, subDevice, ts, rdBandwidth, wtBandwidth, occupancy); + add_event_memModule(dispatch, hostname, vpid, vtid, did, deviceIdx, hMemModule, subDevice, ts, pBandwidth, rdBandwidth, wtBandwidth, occupancy); } diff --git a/ze/btx_zeinterval_callbacks.cpp b/ze/btx_zeinterval_callbacks.cpp index 79acf5cf..1ede4a91 100644 --- a/ze/btx_zeinterval_callbacks.cpp +++ b/ze/btx_zeinterval_callbacks.cpp @@ -821,6 +821,7 @@ static void lttng_ust_ze_sampling_fabricPort_callback(void *btx_handle, void *us double rxThroughput = static_cast(pFabricPortThroughput_val->rxCounter - prev_throughput.rxCounter) / time_diff; double txThroughput = static_cast(pFabricPortThroughput_val->txCounter - prev_throughput.txCounter) / time_diff; DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); + if (rxThroughput != 0) btx_push_message_lttng_fabricPort(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE, uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hFabricPort, subDevice, fabricId, remotePortId, rxThroughput, txThroughput, @@ -856,16 +857,19 @@ static void lttng_ust_ze_sampling_memStats_callback(void *btx_handle, void *usr_ if (pMemBandwidth_val->timestamp == prev_bandwidth.timestamp) return; + // Calculate the RD and WT bandwidth //https://spec.oneapi.io/level-zero/latest/sysman/api.html#_CPPv419zes_mem_bandwidth_t + double allocation = static_cast(pMemState_val->size - pMemState_val->free) * 100.0 / static_cast(pMemState_val->size); double time_diff = static_cast(pMemBandwidth_val->timestamp - prev_bandwidth.timestamp); - double rdBandwidth = static_cast(pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) * 1e6 / (time_diff * pMemBandwidth_val->maxBandwidth); - double wtBandwidth = static_cast(pMemBandwidth_val->writeCounter - prev_bandwidth.writeCounter) * 1e6 / (time_diff * pMemBandwidth_val->maxBandwidth); + double pBandwidth = static_cast((pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) + (pMemBandwidth_val->writeCounter - prev_bandwidth.writeCounter)) * 1e6 / (time_diff * pMemBandwidth_val->maxBandwidth); + double rdBandwidth = static_cast(pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) * 1e6 / (time_diff); + double wtBandwidth = static_cast(pMemBandwidth_val->writeCounter - prev_bandwidth.writeCounter) * 1e6 / (time_diff); DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); btx_push_message_lttng_memModule(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE, - uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hMemModule, subDevice, - rdBandwidth, wtBandwidth, allocation); + uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hMemModule, + subDevice, pBandwidth, rdBandwidth, wtBandwidth, allocation); // Update the stored values it->second = {*pMemBandwidth_val, ts}; } else { @@ -893,9 +897,15 @@ static void lttng_ust_ze_sampling_engineStats_callback(void *btx_handle, void *u auto &[prev_engineStats, prev_ts] = it->second; if (pEngineStats_val->timestamp == prev_engineStats.timestamp) return; - - double time_diff = static_cast(pEngineStats_val->timestamp - prev_engineStats.timestamp); - double activeTime = static_cast(pEngineStats_val->activeTime - prev_engineStats.activeTime) * 100 / time_diff; + double time_diff = pEngineStats_val->timestamp >= prev_engineStats.timestamp ? + static_cast(pEngineStats_val->timestamp - prev_engineStats.timestamp) : + static_cast(pEngineStats_val->timestamp + + (UINT64_MAX - prev_engineStats.timestamp) + 1); + double activeTime = 0; + if(pEngineStats_val->activeTime > prev_engineStats.activeTime) + activeTime = static_cast(pEngineStats_val->activeTime - prev_engineStats.activeTime) * 100 / time_diff; + else + activeTime = static_cast((UINT64_MAX - prev_engineStats.activeTime) + pEngineStats_val->activeTime + 1) * 100 / time_diff; DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice); if (engineProps.type == ZES_ENGINE_GROUP_COMPUTE_ALL) { btx_push_message_lttng_computeEU(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE,