Skip to content

Commit

Permalink
timeline cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
solo2abera committed Sep 17, 2024
1 parent 74d8005 commit 8cf67a0
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 15 deletions.
4 changes: 4 additions & 0 deletions xprof/btx_interval_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,10 @@
:type: integer_unsigned
:field_value_range: 32
:cast_type: uint32_t
- :name: pBandwidth
:field_class:
:type: double
:cast_type: float
- :name: rdBandwidth
:field_class:
:type: double
Expand Down
23 changes: 15 additions & 8 deletions xprof/btx_timeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,21 @@ static perfetto_uuid_t get_counter_track_uuuid(timeline_dispatch_t *dispatch,
oss << track_name << " Module " << domain;
oss << " | " <<(details->RxTx ? "WR BW" : "RD BW");
}
else if (track_name==" Memory Allocation (%)") {
else if (track_name=="Allocated Memory (%)") {
oss << track_name << " Module " << domain;
}
else if (track_name==" CopyEngine (%)" || track_name==" ComputeEngine (%)") {
oss << track_name << " | SubDevice " << domain;
}
else {
oss << track_name << " | Domain " << domain;
else if (track_name==" Power") {
if(domain==0){
oss <<" Total Power";
}
else
oss << track_name << " | SubDevice " << domain-1;
}
else
oss << track_name << " | Domain " << domain;
track_descriptor->set_name(oss.str());
track_descriptor->mutable_counter();
return hp_dev_uuid;
Expand Down Expand Up @@ -177,7 +183,7 @@ static perfetto_uuid_t get_Bandwidth_track_uuuid(timeline_dispatch_t *dispatch,

static perfetto_uuid_t get_Occupancy_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id,
uint64_t did, uint32_t deviceIdx, uint64_t hMemModule, uint32_t subDevice, std::optional<Details> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, " Memory Allocation $", hostname, process_id, did, deviceIdx, hMemModule, subDevice);
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2telmtracks, "Allocated Memory (%)", hostname, process_id, did, deviceIdx, hMemModule, subDevice);
}


Expand All @@ -198,7 +204,7 @@ static void add_event_DTelemetry(timeline_dispatch_t *dispatch, const std::strin
}

static void add_event_memModule( timeline_dispatch_t *dispatch, std::string hostname,
uint64_t process_id, uint64_t thread_id, uint64_t did, uint32_t deviceIdx, uintptr_t hMemModule, uint32_t subDevice, uint64_t timestamp, float rdBandwidth, float wtBandwidth, float occupancy) {
uint64_t process_id, uint64_t thread_id, uint64_t did, uint32_t deviceIdx, uintptr_t hMemModule, uint32_t subDevice, uint64_t timestamp, float pBandwidth, float rdBandwidth, float wtBandwidth, float occupancy) {
// Define details for RX throughput.
Details details = {false, 0, 0};
add_event_DTelemetry(dispatch, hostname, process_id, thread_id, did, deviceIdx, hMemModule, subDevice, timestamp,
Expand Down Expand Up @@ -502,10 +508,11 @@ static void fabricPort_usr_callback(void *btx_handle, void *usr_data, const char

static void memModule_usr_callback(void *btx_handle, void *usr_data, const char *hostname,
int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend,
uint64_t did, uint32_t deviceIdx, uint64_t hMemModule, uint32_t subDevice,
float rdBandwidth, float wtBandwidth, float occupancy) {
uint64_t did, uint32_t deviceIdx, uint64_t hMemModule,
uint32_t subDevice, float pBandwidth, float rdBandwidth,
float wtBandwidth, float occupancy) {
auto *dispatch = static_cast<timeline_dispatch_t *>(usr_data);
add_event_memModule(dispatch, hostname, vpid, vtid, did, deviceIdx, hMemModule, subDevice, ts, rdBandwidth, wtBandwidth, occupancy);
add_event_memModule(dispatch, hostname, vpid, vtid, did, deviceIdx, hMemModule, subDevice, ts, pBandwidth, rdBandwidth, wtBandwidth, occupancy);
}


Expand Down
24 changes: 17 additions & 7 deletions ze/btx_zeinterval_callbacks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,7 @@ static void lttng_ust_ze_sampling_fabricPort_callback(void *btx_handle, void *us
double rxThroughput = static_cast<double>(pFabricPortThroughput_val->rxCounter - prev_throughput.rxCounter) / time_diff;
double txThroughput = static_cast<double>(pFabricPortThroughput_val->txCounter - prev_throughput.txCounter) / time_diff;
DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
if (rxThroughput != 0)
btx_push_message_lttng_fabricPort(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE,
uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hFabricPort, subDevice,
fabricId, remotePortId, rxThroughput, txThroughput,
Expand Down Expand Up @@ -856,16 +857,19 @@ static void lttng_ust_ze_sampling_memStats_callback(void *btx_handle, void *usr_

if (pMemBandwidth_val->timestamp == prev_bandwidth.timestamp)
return;

// Calculate the RD and WT bandwidth
//https://spec.oneapi.io/level-zero/latest/sysman/api.html#_CPPv419zes_mem_bandwidth_t

double allocation = static_cast<double>(pMemState_val->size - pMemState_val->free) * 100.0 / static_cast<double>(pMemState_val->size);
double time_diff = static_cast<double>(pMemBandwidth_val->timestamp - prev_bandwidth.timestamp);
double rdBandwidth = static_cast<double>(pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) * 1e6 / (time_diff * pMemBandwidth_val->maxBandwidth);
double wtBandwidth = static_cast<double>(pMemBandwidth_val->writeCounter - prev_bandwidth.writeCounter) * 1e6 / (time_diff * pMemBandwidth_val->maxBandwidth);
double pBandwidth = static_cast<double>((pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) + (pMemBandwidth_val->writeCounter - prev_bandwidth.writeCounter)) * 1e6 / (time_diff * pMemBandwidth_val->maxBandwidth);
double rdBandwidth = static_cast<double>(pMemBandwidth_val->readCounter - prev_bandwidth.readCounter) * 1e6 / (time_diff);
double wtBandwidth = static_cast<double>(pMemBandwidth_val->writeCounter - prev_bandwidth.writeCounter) * 1e6 / (time_diff);
DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
btx_push_message_lttng_memModule(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE,
uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hMemModule, subDevice,
rdBandwidth, wtBandwidth, allocation);
uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hMemModule,
subDevice, pBandwidth, rdBandwidth, wtBandwidth, allocation);
// Update the stored values
it->second = {*pMemBandwidth_val, ts};
} else {
Expand Down Expand Up @@ -893,9 +897,15 @@ static void lttng_ust_ze_sampling_engineStats_callback(void *btx_handle, void *u
auto &[prev_engineStats, prev_ts] = it->second;
if (pEngineStats_val->timestamp == prev_engineStats.timestamp)
return;

double time_diff = static_cast<double>(pEngineStats_val->timestamp - prev_engineStats.timestamp);
double activeTime = static_cast<double>(pEngineStats_val->activeTime - prev_engineStats.activeTime) * 100 / time_diff;
double time_diff = pEngineStats_val->timestamp >= prev_engineStats.timestamp ?
static_cast<double>(pEngineStats_val->timestamp - prev_engineStats.timestamp) :
static_cast<double>(pEngineStats_val->timestamp +
(UINT64_MAX - prev_engineStats.timestamp) + 1);
double activeTime = 0;
if(pEngineStats_val->activeTime > prev_engineStats.activeTime)
activeTime = static_cast<double>(pEngineStats_val->activeTime - prev_engineStats.activeTime) * 100 / time_diff;
else
activeTime = static_cast<double>((UINT64_MAX - prev_engineStats.activeTime) + pEngineStats_val->activeTime + 1) * 100 / time_diff;
DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
if (engineProps.type == ZES_ENGINE_GROUP_COMPUTE_ALL) {
btx_push_message_lttng_computeEU(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE,
Expand Down

0 comments on commit 8cf67a0

Please sign in to comment.