Skip to content

Commit

Permalink
rebased
Browse files Browse the repository at this point in the history
  • Loading branch information
Solomon Bekele authored and Solomon Bekele committed Oct 2, 2024
1 parent e57f503 commit 44e66f7
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 263 deletions.
59 changes: 2 additions & 57 deletions xprof/btx_timeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ struct Details {

using timeline_dispatch_t = struct timeline_dispatch_s;
using uuid_getter_t = perfetto_uuid_t (*)(timeline_dispatch_t *, const std::string &, uint64_t, uint64_t, uint32_t, uint64_t,
uint32_t, std::optional<FabricDetails>);
uint32_t, std::optional<Details>);

static perfetto_uuid_t gen_perfetto_uuid() {
// Start at one, Look like UUID 0 is special
static std::atomic<perfetto_uuid_t> uuid{1};
Expand Down Expand Up @@ -193,62 +194,6 @@ static void add_event_DTelemetry(timeline_dispatch_t *dispatch, const std::strin
perfetto_uuid_t track_uuid;
track_uuid = uuid_getter(dispatch, hostname, process_id, did, deviceIdx, tHandle, subDevice, options);

auto *packet = dispatch->trace.add_packet();
packet->set_timestamp(0);
packet->set_trusted_packet_sequence_id(TRUSTED_PACKED_SEQUENCE_ID);
auto *track_descriptor = packet->mutable_track_descriptor();
track_descriptor->set_uuid(hp_dev_uuid);
track_descriptor->set_parent_uuid(hp_uuid);
std::ostringstream oss;
if (details) {
oss << track_name << " | SD " << domain;
oss << " | " << details->fabricId << "<->"<< details->remotePortId << " | " <<(details->RxTx ? " TX" : " RX");
} else {
oss << track_name << " | Domain " << domain;
}
track_descriptor->set_name(oss.str());
auto *counter_descriptor = track_descriptor->mutable_counter();
counter_descriptor->set_unit_multiplier(unit_multiplier);
return hp_dev_uuid;
}

static perfetto_uuid_t get_copyEU_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id,
uintptr_t did, uint32_t subDevice, std::optional<FabricDetails> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_dsdev2cpytracks, "CopyEngine (%)", hostname, process_id, did, subDevice, 100);
}

static perfetto_uuid_t get_fpThroughput_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id,
uintptr_t did, uint32_t subDevice, std::optional<FabricDetails> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2pwrtracks, "FabricT", hostname, process_id, did, subDevice, 100, options, &dispatch->hp_dfsdev2fptracks);
}

static perfetto_uuid_t get_power_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id,
uintptr_t did, uint32_t subDevice, std::optional<FabricDetails> options) {
// Extra space to maintain track sequence in the timeline
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2pwrtracks, " Power", hostname, process_id, did, subDevice, 100);
}

static perfetto_uuid_t get_frequency_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id,
uintptr_t did, uint32_t subDevice, std::optional<FabricDetails> options) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2frqtracks, " Ferquency", hostname, process_id, did, subDevice, 100);
}

static perfetto_uuid_t get_computeEU_track_uuuid(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id,
uintptr_t did, uint32_t subDevice, std::optional<FabricDetails> options ) {
return get_counter_track_uuuid(dispatch, dispatch->hp_dsdev2cpetracks, "ComputeEngine (%)", hostname, process_id, did, subDevice, 100);
}

static void add_event_DTelemetry(timeline_dispatch_t *dispatch, const std::string &hostname, uint64_t process_id,
uint64_t thread_id, uintptr_t did, uint32_t subDevice,
uint64_t timestamp, float value, uuid_getter_t uuid_getter, const std::string &eventName,
std::optional<FabricDetails> options = std::nullopt) {
perfetto_uuid_t track_uuid;
if (options.has_value()) {
track_uuid = uuid_getter(dispatch, hostname, process_id, did, subDevice, options);
} else {
track_uuid = uuid_getter(dispatch, hostname, process_id, did, subDevice, std::nullopt);
}

auto *packet = dispatch->trace.add_packet();
packet->set_trusted_packet_sequence_id(TRUSTED_PACKED_SEQUENCE_ID);
packet->set_timestamp(timestamp);
Expand Down
192 changes: 9 additions & 183 deletions ze/btx_zeinterval_callbacks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -821,7 +821,7 @@ static void lttng_ust_ze_sampling_fabricPort_callback(void *btx_handle, void *us
double rxThroughput = static_cast<double>(pFabricPortThroughput_val->rxCounter - prev_throughput.rxCounter) / time_diff;
double txThroughput = static_cast<double>(pFabricPortThroughput_val->txCounter - prev_throughput.txCounter) / time_diff;
DeviceHash uuid_idx = get_device_hash(usr_data, hostname, vpid, hDevice);
if (rxThroughput != 0)
if (rxThroughput != 0 || txThroughput != 0)
btx_push_message_lttng_fabricPort(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE,
uuid_idx.hash, uuid_idx.deviceIdx, (uint64_t)hFabricPort, subDevice,
fabricId, remotePortId, rxThroughput, txThroughput,
Expand Down Expand Up @@ -964,208 +964,34 @@ static void lttng_ust_ze_sampling_deviceProperties_callback(void *btx_handle, vo
auto *data = static_cast<data_t *>(usr_data);
data->sampling_device_property[{hostname, vpid, hDevice}] = {*pDeviceProperties_val, deviceIdx};
}
/*

static void lttng_ust_ze_sampling_subDeviceProperties_callback(void *btx_handle, void *usr_data, int64_t ts,
const char *hostname, int64_t vpid, uint64_t vtid,
zes_device_handle_t hDevice, ze_device_handle_t hSubDevice, size_t _pSubDeviceProperties_val_length,
ze_device_properties_t *pSubDeviceProperties_val) {
auto *data = static_cast<data_t *>(usr_data);
data->sampling_sub_device_property[{hostname, vpid, (ze_device_handle_t)hSubDevice}] = *pSubDeviceProperties_val;
}
*/

static void lttng_ust_ze_sampling_fabricPortProperties_callback(void *btx_handle, void *usr_data, int64_t ts,
const char *hostname, int64_t vpid, uint64_t vtid,
zes_device_handle_t hDevice, zes_fabric_port_handle_t hFabricPort,
ze_device_handle_t hDevice, zes_fabric_port_handle_t hFabricPort,
size_t _pFabricPortProperties_val_length,
zes_fabric_port_properties_t *pFabricPortProperties_val) {
auto *data = static_cast<data_t *>(usr_data);
data->fabricPort_property[{hostname, vpid, (zes_device_handle_t)hDevice, (zes_fabric_port_handle_t)hFabricPort}] = *pFabricPortProperties_val;
data->fabricPort_property[{hostname, vpid, (ze_device_handle_t)hDevice, (zes_fabric_port_handle_t)hFabricPort}] = *pFabricPortProperties_val;
}

static void lttng_ust_ze_sampling_memoryProperties_callback(void *btx_handle, void *usr_data, int64_t ts,
const char *hostname, int64_t vpid, uint64_t vtid,
zes_device_handle_t hDevice, zes_mem_handle_t hMemModule,
ze_device_handle_t hDevice, zes_mem_handle_t hMemModule,
size_t _pMemModuleProperties_val_length,
zes_mem_properties_t *pMemModuleProperties_val) {
auto *data = static_cast<data_t *>(usr_data);
data->memModule_property[{hostname, vpid, (zes_device_handle_t)hDevice, (zes_mem_handle_t)hMemModule}] = *pMemModuleProperties_val;
}


static void lttng_ust_ze_sampling_powerProperties_callback(void *btx_handle, void *usr_data, int64_t ts,
const char *hostname, int64_t vpid, uint64_t vtid,
zes_device_handle_t hDevice, zes_pwr_handle_t hPower,
size_t _pPowerProperties_val_length,
zes_power_properties_t *pPowerProperties_val) {
auto *data = static_cast<data_t *>(usr_data);
data->power_property[{hostname, vpid, (ze_device_handle_t)hDevice, (zes_pwr_handle_t)hPower}] = *pPowerProperties_val;
}

static void lttng_ust_ze_sampling_freqProperties_callback(void *btx_handle, void *usr_data, int64_t ts,
const char *hostname, int64_t vpid, uint64_t vtid,
zes_device_handle_t hDevice, zes_freq_handle_t hFrequency,
size_t _pfreqProperties_val_length,
zes_freq_properties_t *pFreqProperties_val) {
auto *data = static_cast<data_t *>(usr_data);
data->frequency_property[{hostname, vpid, (zes_device_handle_t)hDevice, (zes_freq_handle_t)hFrequency}] = *pFreqProperties_val;
}

static void lttng_ust_ze_sampling_engineProperties_callback(void *btx_handle, void *usr_data, int64_t ts,
const char *hostname, int64_t vpid, uint64_t vtid,
zes_device_handle_t hDevice, zes_engine_handle_t hEngine,
size_t _pEngineProperties_val_length,
zes_engine_properties_t *pEngineProperties_val) {
auto *data = static_cast<data_t *>(usr_data);
data->engine_property[{hostname, vpid, (zes_device_handle_t)hDevice, (zes_engine_handle_t)hEngine}] = *pEngineProperties_val;
}

static void lttng_ust_ze_sampling_fabricPort_callback(void *btx_handle, void *usr_data, int64_t ts,
const char *hostname, int64_t vpid,
uint64_t vtid, ze_device_handle_t hDevice,
zes_fabric_port_handle_t hFabricPort,
size_t _pFabricPortState_val_length,
zes_fabric_port_state_t *pFabricPortState_val,
size_t _pFabricPortThroughput_val_length,
zes_fabric_port_throughput_t *pFabricPortThroughput_val) {
auto *data = static_cast<data_t *>(usr_data);
const auto it0 = data->fabricPort_property.find({hostname, vpid, hDevice, hFabricPort});
if (it0 != data->fabricPort_property.cend()) {
// Get fabricPort properties: subdevice ID, fabeicId ...
auto subDevice = it0->second.subdeviceId;
auto fabricId = it0->second.portId.fabricId;
auto remotePortId = pFabricPortState_val->remotePortId.fabricId;
// Current Speed
double rxSpeed = static_cast<double>(pFabricPortState_val->rxSpeed.bitRate * pFabricPortState_val->rxSpeed.width)/8.0;
double txSpeed = static_cast<double>(pFabricPortState_val->txSpeed.bitRate * pFabricPortState_val->txSpeed.width)/8.0;

// Insert the current throughput data with timestamp
auto [it, inserted] = data->device_fabricPort_ref.insert(
{{hostname, vpid, hDevice, hFabricPort, subDevice}, {*pFabricPortThroughput_val, ts}});
if (inserted)
return;

// Previous throughput data
auto &[prev_throughput, prev_ts] = it->second;

/* Per doc: When taking the delta, the difference between timestamp samples
* could be 0, if the frequency of sampling the snapshots is higher than the
* frequency of the timestamp update. */
if (pFabricPortThroughput_val->timestamp == prev_throughput.timestamp)
return;

double time_diff = static_cast<double>(pFabricPortThroughput_val->timestamp - prev_throughput.timestamp);
// Calculate the RX and TX throughput
double rxThroughput = static_cast<double>(pFabricPortThroughput_val->rxCounter - prev_throughput.rxCounter) / double(time_diff);
double txThroughput = static_cast<double>(pFabricPortThroughput_val->txCounter - prev_throughput.txCounter) / double(time_diff);

btx_push_message_lttng_fabricPort(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE,
(uint64_t)hDevice,(uint64_t)hFabricPort, subDevice,
fabricId, remotePortId, rxThroughput, txThroughput,
rxSpeed, txSpeed);
// Update the stored values
it->second = {*pFabricPortThroughput_val, ts};
} else {
std::cerr << "Fabricport property not found!" << std::endl;
}
}

static void lttng_ust_ze_sampling_engineStats_callback(void *btx_handle, void *usr_data, int64_t ts,
const char *hostname, int64_t vpid,
uint64_t vtid, ze_device_handle_t hDevice,
zes_engine_handle_t hEngine,
size_t _pEngineStats_val_length,
zes_engine_stats_t *pEngineStats_val) {
auto *data = static_cast<data_t *>(usr_data);
const auto it0 = data->engine_property.find({hostname, vpid, hDevice, hEngine});
if (it0 != data->engine_property.cend()) {
const auto& engineProps = it0->second;
uint32_t subDevice = (engineProps.onSubdevice) ? engineProps.subdeviceId : 0;

if (engineProps.type == ZES_ENGINE_GROUP_COMPUTE_ALL || engineProps.type == ZES_ENGINE_GROUP_COPY_ALL) {
auto [it, inserted] = data->device_engines_ref.insert(
{{hostname, vpid, hDevice, hEngine, subDevice}, {*pEngineStats_val, ts}});
if (inserted)
return;
auto &[prev_engineStats, prev_ts] = it->second;
if (pEngineStats_val->timestamp == prev_engineStats.timestamp)
return;

double time_diff = static_cast<double>(pEngineStats_val->timestamp - prev_engineStats.timestamp);
double activeTime = static_cast<double>(pEngineStats_val->activeTime - prev_engineStats.activeTime) / time_diff;

if (engineProps.type == ZES_ENGINE_GROUP_COMPUTE_ALL) {
btx_push_message_lttng_computeEU(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE,
(uint64_t)hDevice, subDevice, activeTime);
} else {
btx_push_message_lttng_copyEU(btx_handle, hostname,0, 0, prev_ts, BACKEND_ZE,
(uint64_t)hDevice, subDevice, activeTime);
}
it->second = {*pEngineStats_val, ts};
}
} else {
std::cerr << "Engine property not found for device: " << hDevice << std::endl;
}
}

static void lttng_ust_ze_sampling_gpu_energy_callback(void *btx_handle, void *usr_data, int64_t ts,
const char *hostname, int64_t vpid,
uint64_t vtid, ze_device_handle_t hDevice,
zes_pwr_handle_t hPower,
size_t _pEnergyCounter_val_length,
zes_power_energy_counter_t *pEnergyCounter_val) {
auto *data = static_cast<data_t *>(usr_data);
const auto it0 = data->power_property.find({hostname, vpid, hDevice, hPower});
if (it0 != data->power_property.cend()) {
const auto& powerProps = it0->second;
uint32_t domain = (powerProps.onSubdevice) ? powerProps.subdeviceId + 1 : 0;
auto [it, inserted] = data->device_energy_ref.insert(
{{hostname, vpid, hDevice, hPower, domain}, {*pEnergyCounter_val, ts}});
if (inserted)
return;

auto &[prev_EnergyCounter, prev_ts] = it->second;
if (pEnergyCounter_val->timestamp == prev_EnergyCounter.timestamp)
return;

double time_diff = static_cast<double>(pEnergyCounter_val->timestamp - prev_EnergyCounter.timestamp);
double power = static_cast<double>(pEnergyCounter_val->energy - prev_EnergyCounter.energy) / time_diff;
btx_push_message_lttng_power(btx_handle, hostname, 0, 0, prev_ts, BACKEND_ZE, (uint64_t)hDevice,
(thapi_domain_idx)domain, power);
it->second = {*pEnergyCounter_val, ts};
} else {
std::cerr << "Power property not found for device: " << hDevice << std::endl;
}
data->memModule_property[{hostname, vpid, (ze_device_handle_t)hDevice, (zes_mem_handle_t)hMemModule}] = *pMemModuleProperties_val;
}


static void lttng_ust_ze_sampling_gpu_frequency_callback(void *btx_handle, void *usr_data, int64_t ts,
const char *hostname, int64_t vpid,
uint64_t vtid, ze_device_handle_t hDevice,
zes_freq_handle_t hFrequency,
size_t _pFreqState_val_length,
zes_freq_state_t *pFreqState_val) {
auto *data = static_cast<data_t *>(usr_data);
const auto it0 = data->frequency_property.find({hostname, vpid, hDevice, hFrequency});
if (it0 != data->frequency_property.cend()) {
const auto& freqProps = it0->second;
uint32_t domain = (freqProps.onSubdevice) ? freqProps.subdeviceId : 0;

btx_push_message_lttng_frequency(btx_handle, hostname, 0, 0, ts, BACKEND_ZE, (uint64_t)hDevice,
(thapi_domain_idx)domain, pFreqState_val->actual);
}else {
std::cerr << "Frequency property not found for device: " << hDevice << std::endl;
}
}
// Properties
static void lttng_ust_ze_sampling_fabricPortProperties_callback(void *btx_handle, void *usr_data, int64_t ts,
const char *hostname, int64_t vpid, uint64_t vtid,
ze_device_handle_t hDevice, zes_fabric_port_handle_t hFabricPort,
size_t _pFabricPortProperties_val_length,
zes_fabric_port_properties_t *pFabricPortProperties_val) {
auto *data = static_cast<data_t *>(usr_data);
data->fabricPort_property[{hostname, vpid, (ze_device_handle_t)hDevice, (zes_fabric_port_handle_t)hFabricPort}] = *pFabricPortProperties_val;
}

static void lttng_ust_ze_sampling_powerProperties_callback(void *btx_handle, void *usr_data, int64_t ts,
const char *hostname, int64_t vpid, uint64_t vtid,
ze_device_handle_t hDevice, zes_pwr_handle_t hPower,
Expand Down Expand Up @@ -1295,8 +1121,8 @@ void btx_register_usr_callbacks(void *btx_handle) {
//Properties
btx_register_callbacks_lttng_ust_ze_sampling_deviceProperties(
btx_handle, &lttng_ust_ze_sampling_deviceProperties_callback);
// btx_register_callbacks_lttng_ust_ze_sampling_subDeviceProperties(
// btx_handle, &lttng_ust_ze_sampling_subDeviceProperties_callback);
btx_register_callbacks_lttng_ust_ze_sampling_subDeviceProperties(
btx_handle, &lttng_ust_ze_sampling_subDeviceProperties_callback);
btx_register_callbacks_lttng_ust_ze_sampling_fabricPortProperties(
btx_handle, &lttng_ust_ze_sampling_fabricPortProperties_callback);
btx_register_callbacks_lttng_ust_ze_sampling_powerProperties(
Expand Down
Loading

0 comments on commit 44e66f7

Please sign in to comment.