Skip to content

Commit

Permalink
Support for utilization metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
solo2abera committed Sep 22, 2023
1 parent e604c70 commit 4045398
Show file tree
Hide file tree
Showing 11 changed files with 404 additions and 10 deletions.
97 changes: 97 additions & 0 deletions utils/xprof_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,103 @@ bt_message* create_frequency_message(const char* hostname, const process_id_t pr
return message;
}

bt_message* create_computeEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id,
const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts,
bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) {

/* Message creation */
bt_message *message = bt_message_event_create(
message_iterator, event_class, stream);

/* event */
bt_event *downstream_event = bt_message_event_borrow_event(message);

/* Common context */
bt_field *context_field = bt_event_borrow_common_context_field(downstream_event);

// Hostname
bt_field *hostname_msg_field = bt_field_structure_borrow_member_field_by_index(context_field,0);
bt_field_string_set_value(hostname_msg_field, hostname);
// pid
bt_field *vpid_field = bt_field_structure_borrow_member_field_by_index(context_field,1);
bt_field_integer_signed_set_value(vpid_field, process_id);
// vid
bt_field *vtid_field = bt_field_structure_borrow_member_field_by_index(context_field,2);
bt_field_integer_signed_set_value(vtid_field, thread_id);
// ts
bt_field *ts_field = bt_field_structure_borrow_member_field_by_index(context_field,3);
bt_field_integer_signed_set_value(ts_field, ts);
// backend
bt_field *backend_field = bt_field_structure_borrow_member_field_by_index(context_field,4);
bt_field_integer_signed_set_value(backend_field, backend);

/* Payload */
bt_field *payload_field = bt_event_borrow_payload_field(downstream_event);

// did
bt_field *device_id_field = bt_field_structure_borrow_member_field_by_index(payload_field,0);
bt_field_integer_unsigned_set_value(device_id_field, hDevice);

//subDevice
bt_field *subDevice_field = bt_field_structure_borrow_member_field_by_index(payload_field,1);
bt_field_integer_unsigned_set_value(subDevice_field, subDevice);

//activeTime
bt_field *activeTime_field = bt_field_structure_borrow_member_field_by_index(payload_field,2);
bt_field_integer_unsigned_set_value(activeTime_field, activeTime);

return message;
}

bt_message* create_copyEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id,
const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts,
bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) {

/* Message creation */
bt_message *message = bt_message_event_create(
message_iterator, event_class, stream);

/* event */
bt_event *downstream_event = bt_message_event_borrow_event(message);

/* Common context */
bt_field *context_field = bt_event_borrow_common_context_field(downstream_event);

// Hostname
bt_field *hostname_msg_field = bt_field_structure_borrow_member_field_by_index(context_field,0);
bt_field_string_set_value(hostname_msg_field, hostname);
// pid
bt_field *vpid_field = bt_field_structure_borrow_member_field_by_index(context_field,1);
bt_field_integer_signed_set_value(vpid_field, process_id);
// vid
bt_field *vtid_field = bt_field_structure_borrow_member_field_by_index(context_field,2);
bt_field_integer_signed_set_value(vtid_field, thread_id);
// ts
bt_field *ts_field = bt_field_structure_borrow_member_field_by_index(context_field,3);
bt_field_integer_signed_set_value(ts_field, ts);
// backend
bt_field *backend_field = bt_field_structure_borrow_member_field_by_index(context_field,4);
bt_field_integer_signed_set_value(backend_field, backend);

/* Payload */
bt_field *payload_field = bt_event_borrow_payload_field(downstream_event);

// did
bt_field *device_id_field = bt_field_structure_borrow_member_field_by_index(payload_field,0);
bt_field_integer_unsigned_set_value(device_id_field, hDevice);

// subDevice
bt_field *subDevice_field = bt_field_structure_borrow_member_field_by_index(payload_field,1);
bt_field_integer_unsigned_set_value(subDevice_field, subDevice);

//activeTime
bt_field *activeTime_field = bt_field_structure_borrow_member_field_by_index(payload_field,2);
bt_field_integer_unsigned_set_value(activeTime_field, activeTime);

return message;
}


bt_message* create_host_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, const char* name,
const uint64_t ts, const uint64_t duration, const bool err,
bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) {
Expand Down
12 changes: 10 additions & 2 deletions utils/xprof_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ typedef std::string hostname_t;
typedef std::string thapi_function_name;
typedef uintptr_t thapi_device_id;
typedef uint32_t thapi_domain_id;

typedef uint32_t thapi_sdevice_id;
// Represent a device and a sub device
typedef std::tuple<thapi_device_id, thapi_device_id> dsd_t;
typedef std::tuple<hostname_t, process_id_t> hp_t;
Expand All @@ -60,6 +60,7 @@ typedef std::tuple<hostname_t, process_id_t, thread_id_t, thapi_device_id, thapi
typedef std::tuple<hostname_t, process_id_t, thapi_device_id> hp_device_t;
typedef std::tuple<hostname_t, process_id_t, thapi_device_id, thapi_device_id> hp_dsd_t;
typedef std::tuple<hostname_t, process_id_t, thapi_device_id, thapi_domain_id> hp_ddomain_t;
typedef std::tuple<hostname_t, process_id_t, thapi_device_id, thapi_sdevice_id> hp_dsdev_t;
typedef std::tuple<long, long> sd_t;
typedef std::tuple<thread_id_t, thapi_function_name, long> tfn_ts_t;
typedef std::tuple<thapi_function_name, long> fn_ts_t;
Expand Down Expand Up @@ -116,11 +117,18 @@ bt_message* create_power_message(const char* hostname, const process_id_t propro
const uintptr_t hDevice, const uint32_t domain, const uint64_t power, const uint64_t ts,
bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN);


bt_message* create_frequency_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id,
const uintptr_t hDevice, const uint32_t domain, const uint64_t ts, const uint64_t frequency,
bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN);

bt_message* create_computeEU_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id,
const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts,
bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN);

bt_message* create_copyEU_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id,
const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts,
bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN);

bt_message *create_host_message(const char *hostname, const process_id_t, const thread_id_t,
const char *name, const uint64_t ts, const uint64_t duration,
const bool err, bt_event_class *, bt_self_message_iterator *,
Expand Down
38 changes: 38 additions & 0 deletions xprof/btx_interval_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,41 @@
:type: integer_unsigned
:field_value_range: 64
:cast_type: uint64_t
- :name: lttng:computeEU
:payload_field_class:
:type: structure
:members:
- :name: did
:field_class:
:type: integer_unsigned
:field_value_range: 64
:cast_type: uint64_t
- :name: subDevice
:field_class:
:type: integer_unsigned
:field_value_range: 32
:cast_type: uint32_t
- :name: activeTime
:field_class:
:type: integer_unsigned
:field_value_range: 64
:cast_type: uint64_t
- :name: lttng:copyEU
:payload_field_class:
:type: structure
:members:
- :name: did
:field_class:
:type: integer_unsigned
:field_value_range: 64
:cast_type: uint64_t
- :name: subDevice
:field_class:
:type: integer_unsigned
:field_value_range: 32
:cast_type: uint32_t
- :name: activeTime
:field_class:
:type: integer_unsigned
:field_value_range: 64
:cast_type: uint64_t
65 changes: 61 additions & 4 deletions xprof/btx_timeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ struct timeline_dispatch_s {
std::unordered_map<hp_device_t, perfetto_uuid_t> hp_device2countertracks;
std::unordered_map<hp_ddomain_t, perfetto_uuid_t> hp_ddomain2frqtracks;
std::unordered_map<hp_ddomain_t, perfetto_uuid_t> hp_ddomain2pwrtracks;

std::unordered_map<hp_dsdev_t, perfetto_uuid_t> hp_dsdev2cpetracks;
std::unordered_map<hp_dsdev_t, perfetto_uuid_t> hp_dsdev2cpytracks;
perfetto_pruned::Trace trace;
};
using timeline_dispatch_t = struct timeline_dispatch_s;
Expand Down Expand Up @@ -99,13 +100,24 @@ static perfetto_uuid_t get_counter_track_uuuid(timeline_dispatch_t *dispatch,
}
static perfetto_uuid_t get_frequency_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname,
uint64_t process_id, thapi_device_id did, thapi_domain_id domain) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2frqtracks, "GPU Frequency", hostname, process_id, did, domain);
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2frqtracks, " GPU Frequency", hostname, process_id, did, domain);
}
static perfetto_uuid_t get_power_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname,
uint64_t process_id, thapi_device_id did, thapi_device_id domain) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2pwrtracks, " GPU Power", hostname, process_id, did, domain);
uint64_t process_id, thapi_device_id did, thapi_domain_id domain) {
return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2pwrtracks, " GPU Power", hostname, process_id, did, domain);
}

static perfetto_uuid_t get_computeEU_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname,
uint64_t process_id, thapi_device_id did, thapi_sdevice_id subDevice) {
return get_counter_track_uuuid(dispatch, dispatch->hp_dsdev2cpetracks, "ComputeE Utilization", hostname, process_id, did, subDevice);
}

static perfetto_uuid_t get_copyEU_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname,
uint64_t process_id, thapi_device_id did, thapi_sdevice_id subDevice) {
return get_counter_track_uuuid(dispatch, dispatch->hp_dsdev2cpytracks, "CopyE Utilization", hostname, process_id, did, subDevice);
}


static void add_event_frequency(timeline_dispatch_t *dispatch, std::string hostname,
uint64_t process_id, uint64_t thread_id, uintptr_t did,
uint32_t domain, uint64_t timestamp, uint64_t frequency) {
Expand Down Expand Up @@ -134,6 +146,34 @@ static void add_event_power(timeline_dispatch_t *dispatch, std::string hostname,
track_event->set_name("Power");
track_event->set_counter_value(power);
}
static void add_event_computeEU(timeline_dispatch_t *dispatch, std::string hostname,
uint64_t process_id, uint64_t thread_id, uintptr_t did,
uint32_t subDevice, uint64_t timestamp, uint64_t activeTime) {
perfetto_uuid_t track_uuid = get_computeEU_track_uuuid(dispatch, hostname, process_id, did, subDevice);
auto *packet = dispatch->trace.add_packet();
packet->set_trusted_packet_sequence_id(10000);
packet->set_timestamp(timestamp);
auto *track_event = packet->mutable_track_event();
track_event->set_type(perfetto_pruned::TrackEvent::TYPE_COUNTER);
track_event->set_track_uuid(track_uuid);
track_event->set_name("computeEngine Usage");
track_event->set_counter_value(activeTime);
}

static void add_event_copyEU(timeline_dispatch_t *dispatch, std::string hostname,
uint64_t process_id, uint64_t thread_id, uintptr_t did,
uint32_t subDevice, uint64_t timestamp, uint64_t activeTime) {
perfetto_uuid_t track_uuid = get_copyEU_track_uuuid(dispatch, hostname, process_id, did, subDevice);
auto *packet = dispatch->trace.add_packet();
packet->set_trusted_packet_sequence_id(10000);
packet->set_timestamp(timestamp);
auto *track_event = packet->mutable_track_event();
track_event->set_type(perfetto_pruned::TrackEvent::TYPE_COUNTER);
track_event->set_track_uuid(track_uuid);
track_event->set_name("copyEngine Usage");
track_event->set_counter_value(activeTime);
}


static void add_event_begin(timeline_dispatch_t *dispatch, perfetto_uuid_t uuid, timestamp_t begin,
std::string name) {
Expand Down Expand Up @@ -352,11 +392,28 @@ static void power_usr_callback(void *btx_handle, void *usr_data, const char *hos
add_event_power(dispatch, hostname, vpid, vtid, did, domain, ts, power);
}

static void computeEU_usr_callback(void *btx_handle, void *usr_data, const char *hostname,
int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend,
uint64_t did, uint32_t subDevice, uint64_t activeTime) {
auto *dispatch = static_cast<timeline_dispatch_t *>(usr_data);
add_event_computeEU(dispatch, hostname, vpid, vtid, did, subDevice, ts, activeTime);
}

static void copyEU_usr_callback(void *btx_handle, void *usr_data, const char *hostname,
int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend,
uint64_t did, uint32_t subDevice, uint64_t activeTime) {
auto *dispatch = static_cast<timeline_dispatch_t *>(usr_data);
add_event_copyEU(dispatch, hostname, vpid, vtid, did, subDevice, ts, activeTime);
}


void btx_register_usr_callbacks(void *btx_handle) {
btx_register_callbacks_lttng_host(btx_handle, &host_usr_callback);
btx_register_callbacks_lttng_device(btx_handle, &device_usr_callback);
btx_register_callbacks_lttng_frequency(btx_handle, &frequency_usr_callback);
btx_register_callbacks_lttng_power(btx_handle, &power_usr_callback);
btx_register_callbacks_lttng_computeEU(btx_handle, &computeEU_usr_callback);
btx_register_callbacks_lttng_copyEU(btx_handle, &copyEU_usr_callback);
btx_register_callbacks_initialize_usr_data(btx_handle, &btx_initialize_usr_data);
btx_register_callbacks_finalize_usr_data(btx_handle, &btx_finalize_usr_data);
}
3 changes: 2 additions & 1 deletion xprof/interval.c.erb
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ bt_component_class_initialize_method_status <%= namespace %>_dispatch_initialize
dispatch->device_name_event_class = create_lttng_device_name_event_class_message(trace_class, stream_class);
dispatch->frequency_event_class = create_lttng_frequency_event_class_message(trace_class, stream_class);
dispatch->power_event_class = create_lttng_power_event_class_message(trace_class, stream_class);

dispatch->computeEU_event_class = create_lttng_computeEU_event_class_message(trace_class, stream_class);
dispatch->copyEU_event_class = create_lttng_copyEU_event_class_message(trace_class, stream_class);
/* Create a default trace from (instance of `trace_class`) */
bt_trace *trace = bt_trace_create(trace_class);

Expand Down
2 changes: 2 additions & 0 deletions xprof/interval.h.erb
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ struct <%= namespace %>_dispatch {
bt_event_class *device_name_event_class;
bt_event_class *frequency_event_class;
bt_event_class *power_event_class;
bt_event_class *computeEU_event_class;
bt_event_class *copyEU_event_class;
/* Component's input port (weak) */
bt_self_component_port_input *in_port;
};
Expand Down
24 changes: 24 additions & 0 deletions xprof/interval_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,27 @@
:field_value_range: 32
- :name: power
:class: unsigned
- :name: lttng:computeEU
:payload:
- :name: did
:class: unsigned
:class_properties:
:preferred_display_base: 16
- :name: subDevice
:class: unsigned
:class_properties:
:field_value_range: 32
- :name: activeTime
:class: unsigned
- :name: lttng:copyEU
:payload:
- :name: did
:class: unsigned
:class_properties:
:preferred_display_base: 16
- :name: subDevice
:class: unsigned
:class_properties:
:field_value_range: 32
- :name: activeTime
:class: unsigned
Loading

0 comments on commit 4045398

Please sign in to comment.