From 404539874065398dacf7fb13b88d3f98bd2f000e Mon Sep 17 00:00:00 2001 From: sbekele Date: Fri, 22 Sep 2023 04:03:48 +0000 Subject: [PATCH 01/10] Support for utilization metrics --- utils/xprof_utils.cpp | 97 +++++++++++++++++++++++++++++++++ utils/xprof_utils.hpp | 12 +++- xprof/btx_interval_model.yaml | 38 +++++++++++++ xprof/btx_timeline.cpp | 65 ++++++++++++++++++++-- xprof/interval.c.erb | 3 +- xprof/interval.h.erb | 2 + xprof/interval_model.yaml | 24 ++++++++ ze/tracer_ze_helpers.include.c | 77 ++++++++++++++++++++++++++ ze/ze_events.yaml | 28 +++++++++- ze/zeinterval_callbacks.cpp.erb | 60 ++++++++++++++++++++ ze/zeinterval_callbacks.hpp | 8 +++ 11 files changed, 404 insertions(+), 10 deletions(-) diff --git a/utils/xprof_utils.cpp b/utils/xprof_utils.cpp index 5527e3eb..34e011b1 100644 --- a/utils/xprof_utils.cpp +++ b/utils/xprof_utils.cpp @@ -118,6 +118,103 @@ bt_message* create_frequency_message(const char* hostname, const process_id_t pr return message; } +bt_message* create_computeEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) { + + /* Message creation */ + bt_message *message = bt_message_event_create( + message_iterator, event_class, stream); + + /* event */ + bt_event *downstream_event = bt_message_event_borrow_event(message); + + /* Common context */ + bt_field *context_field = bt_event_borrow_common_context_field(downstream_event); + + // Hostname + bt_field *hostname_msg_field = bt_field_structure_borrow_member_field_by_index(context_field,0); + bt_field_string_set_value(hostname_msg_field, hostname); + // pid + bt_field *vpid_field = bt_field_structure_borrow_member_field_by_index(context_field,1); + bt_field_integer_signed_set_value(vpid_field, process_id); + // vid + bt_field *vtid_field = bt_field_structure_borrow_member_field_by_index(context_field,2); + bt_field_integer_signed_set_value(vtid_field, thread_id); + // ts + bt_field *ts_field = bt_field_structure_borrow_member_field_by_index(context_field,3); + bt_field_integer_signed_set_value(ts_field, ts); + // backend + bt_field *backend_field = bt_field_structure_borrow_member_field_by_index(context_field,4); + bt_field_integer_signed_set_value(backend_field, backend); + + /* Payload */ + bt_field *payload_field = bt_event_borrow_payload_field(downstream_event); + + // did + bt_field *device_id_field = bt_field_structure_borrow_member_field_by_index(payload_field,0); + bt_field_integer_unsigned_set_value(device_id_field, hDevice); + + //subDevice + bt_field *subDevice_field = bt_field_structure_borrow_member_field_by_index(payload_field,1); + bt_field_integer_unsigned_set_value(subDevice_field, subDevice); + + //activeTime + bt_field *activeTime_field = bt_field_structure_borrow_member_field_by_index(payload_field,2); + bt_field_integer_unsigned_set_value(activeTime_field, activeTime); + + return message; +} + +bt_message* create_copyEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) { + + /* Message creation */ + bt_message *message = bt_message_event_create( + message_iterator, event_class, stream); + + /* event */ + bt_event *downstream_event = bt_message_event_borrow_event(message); + + /* Common context */ + bt_field *context_field = bt_event_borrow_common_context_field(downstream_event); + + // Hostname + bt_field *hostname_msg_field = bt_field_structure_borrow_member_field_by_index(context_field,0); + bt_field_string_set_value(hostname_msg_field, hostname); + // pid + bt_field *vpid_field = bt_field_structure_borrow_member_field_by_index(context_field,1); + bt_field_integer_signed_set_value(vpid_field, process_id); + // vid + bt_field *vtid_field = bt_field_structure_borrow_member_field_by_index(context_field,2); + bt_field_integer_signed_set_value(vtid_field, thread_id); + // ts + bt_field *ts_field = bt_field_structure_borrow_member_field_by_index(context_field,3); + bt_field_integer_signed_set_value(ts_field, ts); + // backend + bt_field *backend_field = bt_field_structure_borrow_member_field_by_index(context_field,4); + bt_field_integer_signed_set_value(backend_field, backend); + + /* Payload */ + bt_field *payload_field = bt_event_borrow_payload_field(downstream_event); + + // did + bt_field *device_id_field = bt_field_structure_borrow_member_field_by_index(payload_field,0); + bt_field_integer_unsigned_set_value(device_id_field, hDevice); + + // subDevice + bt_field *subDevice_field = bt_field_structure_borrow_member_field_by_index(payload_field,1); + bt_field_integer_unsigned_set_value(subDevice_field, subDevice); + + //activeTime + bt_field *activeTime_field = bt_field_structure_borrow_member_field_by_index(payload_field,2); + bt_field_integer_unsigned_set_value(activeTime_field, activeTime); + + return message; +} + + bt_message* create_host_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, const char* name, const uint64_t ts, const uint64_t duration, const bool err, bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) { diff --git a/utils/xprof_utils.hpp b/utils/xprof_utils.hpp index 2695e4d5..cf13da4a 100644 --- a/utils/xprof_utils.hpp +++ b/utils/xprof_utils.hpp @@ -47,7 +47,7 @@ typedef std::string hostname_t; typedef std::string thapi_function_name; typedef uintptr_t thapi_device_id; typedef uint32_t thapi_domain_id; - +typedef uint32_t thapi_sdevice_id; // Represent a device and a sub device typedef std::tuple dsd_t; typedef std::tuple hp_t; @@ -60,6 +60,7 @@ typedef std::tuple hp_device_t; typedef std::tuple hp_dsd_t; typedef std::tuple hp_ddomain_t; +typedef std::tuple hp_dsdev_t; typedef std::tuple sd_t; typedef std::tuple tfn_ts_t; typedef std::tuple fn_ts_t; @@ -116,11 +117,18 @@ bt_message* create_power_message(const char* hostname, const process_id_t propro const uintptr_t hDevice, const uint32_t domain, const uint64_t power, const uint64_t ts, bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); - bt_message* create_frequency_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id, const uintptr_t hDevice, const uint32_t domain, const uint64_t ts, const uint64_t frequency, bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); +bt_message* create_computeEU_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); + +bt_message* create_copyEU_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); + bt_message *create_host_message(const char *hostname, const process_id_t, const thread_id_t, const char *name, const uint64_t ts, const uint64_t duration, const bool err, bt_event_class *, bt_self_message_iterator *, diff --git a/xprof/btx_interval_model.yaml b/xprof/btx_interval_model.yaml index f62aac1a..78785670 100644 --- a/xprof/btx_interval_model.yaml +++ b/xprof/btx_interval_model.yaml @@ -127,3 +127,41 @@ :type: integer_unsigned :field_value_range: 64 :cast_type: uint64_t + - :name: lttng:computeEU + :payload_field_class: + :type: structure + :members: + - :name: did + :field_class: + :type: integer_unsigned + :field_value_range: 64 + :cast_type: uint64_t + - :name: subDevice + :field_class: + :type: integer_unsigned + :field_value_range: 32 + :cast_type: uint32_t + - :name: activeTime + :field_class: + :type: integer_unsigned + :field_value_range: 64 + :cast_type: uint64_t + - :name: lttng:copyEU + :payload_field_class: + :type: structure + :members: + - :name: did + :field_class: + :type: integer_unsigned + :field_value_range: 64 + :cast_type: uint64_t + - :name: subDevice + :field_class: + :type: integer_unsigned + :field_value_range: 32 + :cast_type: uint32_t + - :name: activeTime + :field_class: + :type: integer_unsigned + :field_value_range: 64 + :cast_type: uint64_t diff --git a/xprof/btx_timeline.cpp b/xprof/btx_timeline.cpp index 0c9f3d0d..4dc15661 100644 --- a/xprof/btx_timeline.cpp +++ b/xprof/btx_timeline.cpp @@ -30,7 +30,8 @@ struct timeline_dispatch_s { std::unordered_map hp_device2countertracks; std::unordered_map hp_ddomain2frqtracks; std::unordered_map hp_ddomain2pwrtracks; - + std::unordered_map hp_dsdev2cpetracks; + std::unordered_map hp_dsdev2cpytracks; perfetto_pruned::Trace trace; }; using timeline_dispatch_t = struct timeline_dispatch_s; @@ -99,13 +100,24 @@ static perfetto_uuid_t get_counter_track_uuuid(timeline_dispatch_t *dispatch, } static perfetto_uuid_t get_frequency_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, uint64_t process_id, thapi_device_id did, thapi_domain_id domain) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2frqtracks, "GPU Frequency", hostname, process_id, did, domain); + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2frqtracks, " GPU Frequency", hostname, process_id, did, domain); } static perfetto_uuid_t get_power_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, thapi_device_id did, thapi_device_id domain) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2pwrtracks, " GPU Power", hostname, process_id, did, domain); + uint64_t process_id, thapi_device_id did, thapi_domain_id domain) { + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2pwrtracks, " GPU Power", hostname, process_id, did, domain); +} + +static perfetto_uuid_t get_computeEU_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, + uint64_t process_id, thapi_device_id did, thapi_sdevice_id subDevice) { + return get_counter_track_uuuid(dispatch, dispatch->hp_dsdev2cpetracks, "ComputeE Utilization", hostname, process_id, did, subDevice); +} + +static perfetto_uuid_t get_copyEU_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, + uint64_t process_id, thapi_device_id did, thapi_sdevice_id subDevice) { + return get_counter_track_uuuid(dispatch, dispatch->hp_dsdev2cpytracks, "CopyE Utilization", hostname, process_id, did, subDevice); } + static void add_event_frequency(timeline_dispatch_t *dispatch, std::string hostname, uint64_t process_id, uint64_t thread_id, uintptr_t did, uint32_t domain, uint64_t timestamp, uint64_t frequency) { @@ -134,6 +146,34 @@ static void add_event_power(timeline_dispatch_t *dispatch, std::string hostname, track_event->set_name("Power"); track_event->set_counter_value(power); } +static void add_event_computeEU(timeline_dispatch_t *dispatch, std::string hostname, + uint64_t process_id, uint64_t thread_id, uintptr_t did, + uint32_t subDevice, uint64_t timestamp, uint64_t activeTime) { + perfetto_uuid_t track_uuid = get_computeEU_track_uuuid(dispatch, hostname, process_id, did, subDevice); + auto *packet = dispatch->trace.add_packet(); + packet->set_trusted_packet_sequence_id(10000); + packet->set_timestamp(timestamp); + auto *track_event = packet->mutable_track_event(); + track_event->set_type(perfetto_pruned::TrackEvent::TYPE_COUNTER); + track_event->set_track_uuid(track_uuid); + track_event->set_name("computeEngine Usage"); + track_event->set_counter_value(activeTime); +} + +static void add_event_copyEU(timeline_dispatch_t *dispatch, std::string hostname, + uint64_t process_id, uint64_t thread_id, uintptr_t did, + uint32_t subDevice, uint64_t timestamp, uint64_t activeTime) { + perfetto_uuid_t track_uuid = get_copyEU_track_uuuid(dispatch, hostname, process_id, did, subDevice); + auto *packet = dispatch->trace.add_packet(); + packet->set_trusted_packet_sequence_id(10000); + packet->set_timestamp(timestamp); + auto *track_event = packet->mutable_track_event(); + track_event->set_type(perfetto_pruned::TrackEvent::TYPE_COUNTER); + track_event->set_track_uuid(track_uuid); + track_event->set_name("copyEngine Usage"); + track_event->set_counter_value(activeTime); +} + static void add_event_begin(timeline_dispatch_t *dispatch, perfetto_uuid_t uuid, timestamp_t begin, std::string name) { @@ -352,11 +392,28 @@ static void power_usr_callback(void *btx_handle, void *usr_data, const char *hos add_event_power(dispatch, hostname, vpid, vtid, did, domain, ts, power); } +static void computeEU_usr_callback(void *btx_handle, void *usr_data, const char *hostname, + int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, + uint64_t did, uint32_t subDevice, uint64_t activeTime) { + auto *dispatch = static_cast(usr_data); + add_event_computeEU(dispatch, hostname, vpid, vtid, did, subDevice, ts, activeTime); +} + +static void copyEU_usr_callback(void *btx_handle, void *usr_data, const char *hostname, + int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, + uint64_t did, uint32_t subDevice, uint64_t activeTime) { + auto *dispatch = static_cast(usr_data); + add_event_copyEU(dispatch, hostname, vpid, vtid, did, subDevice, ts, activeTime); +} + + void btx_register_usr_callbacks(void *btx_handle) { btx_register_callbacks_lttng_host(btx_handle, &host_usr_callback); btx_register_callbacks_lttng_device(btx_handle, &device_usr_callback); btx_register_callbacks_lttng_frequency(btx_handle, &frequency_usr_callback); btx_register_callbacks_lttng_power(btx_handle, &power_usr_callback); + btx_register_callbacks_lttng_computeEU(btx_handle, &computeEU_usr_callback); + btx_register_callbacks_lttng_copyEU(btx_handle, ©EU_usr_callback); btx_register_callbacks_initialize_usr_data(btx_handle, &btx_initialize_usr_data); btx_register_callbacks_finalize_usr_data(btx_handle, &btx_finalize_usr_data); } diff --git a/xprof/interval.c.erb b/xprof/interval.c.erb index 8e09ef4d..9a57d52e 100644 --- a/xprof/interval.c.erb +++ b/xprof/interval.c.erb @@ -99,7 +99,8 @@ bt_component_class_initialize_method_status <%= namespace %>_dispatch_initialize dispatch->device_name_event_class = create_lttng_device_name_event_class_message(trace_class, stream_class); dispatch->frequency_event_class = create_lttng_frequency_event_class_message(trace_class, stream_class); dispatch->power_event_class = create_lttng_power_event_class_message(trace_class, stream_class); - + dispatch->computeEU_event_class = create_lttng_computeEU_event_class_message(trace_class, stream_class); + dispatch->copyEU_event_class = create_lttng_copyEU_event_class_message(trace_class, stream_class); /* Create a default trace from (instance of `trace_class`) */ bt_trace *trace = bt_trace_create(trace_class); diff --git a/xprof/interval.h.erb b/xprof/interval.h.erb index 2ebdb184..3a0e1d91 100644 --- a/xprof/interval.h.erb +++ b/xprof/interval.h.erb @@ -58,6 +58,8 @@ struct <%= namespace %>_dispatch { bt_event_class *device_name_event_class; bt_event_class *frequency_event_class; bt_event_class *power_event_class; + bt_event_class *computeEU_event_class; + bt_event_class *copyEU_event_class; /* Component's input port (weak) */ bt_self_component_port_input *in_port; }; diff --git a/xprof/interval_model.yaml b/xprof/interval_model.yaml index 0a9df04a..af2e6e45 100644 --- a/xprof/interval_model.yaml +++ b/xprof/interval_model.yaml @@ -78,3 +78,27 @@ :field_value_range: 32 - :name: power :class: unsigned +- :name: lttng:computeEU + :payload: + - :name: did + :class: unsigned + :class_properties: + :preferred_display_base: 16 + - :name: subDevice + :class: unsigned + :class_properties: + :field_value_range: 32 + - :name: activeTime + :class: unsigned +- :name: lttng:copyEU + :payload: + - :name: did + :class: unsigned + :class_properties: + :preferred_display_base: 16 + - :name: subDevice + :class: unsigned + :class_properties: + :field_value_range: 32 + - :name: activeTime + :class: unsigned diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index 583299f3..db605617 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -762,9 +762,23 @@ static int _sampling_initialized = 0; static ze_device_handle_t* _sampling_hDevices; static zes_freq_handle_t** _sampling_hFrequencies; static zes_pwr_handle_t** _sampling_hPowers; +static zes_engine_handle_t** _sampling_engineHandles; static uint32_t _sampling_deviceCount; +static uint32_t _sampling_subDeviceCount; static uint32_t* _sampling_freqDomainCounts; static uint32_t* _sampling_powerDomainCounts; +static uint32_t* _sampling_engineCounts; +static uint32_t* _sampling_engineCounts; + +typedef struct { + uint64_t timestamp; + uint64_t computeActive; +} computeEngineData; + +typedef struct { + uint64_t timestamp; + uint64_t copyActive; +} copyEngineData; int initializeHandles() { ze_result_t res; @@ -811,6 +825,8 @@ int initializeHandles() { _ZE_ERROR_MSG("2nd ZE_DRIVER_GET_PTR", res); return -1; } + //Get no sub-devices + zeDeviceGetSubDevices(_sampling_hDevices[0], &_sampling_subDeviceCount, NULL); _sampling_hFrequencies = (zes_freq_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_freq_handle_t*)); _sampling_freqDomainCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); @@ -818,6 +834,9 @@ int initializeHandles() { _sampling_hPowers = (zes_pwr_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_pwr_handle_t*)); _sampling_powerDomainCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); + _sampling_engineHandles = (zes_engine_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_engine_handle_t*)); + _sampling_engineCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); + for (uint32_t i = 0; i < _sampling_deviceCount; i++) { // Get frequency domains for each device res = zesDeviceEnumFrequencyDomains(_sampling_hDevices[i], &_sampling_freqDomainCounts[i], NULL); @@ -846,6 +865,19 @@ int initializeHandles() { printf("zesDeviceEnumPowerDomains failed for device %d: %d\n", i, res); return(-1); } + // Get the available engines for each device + res = zesDeviceEnumEngineGroups(_sampling_hDevices[i], &_sampling_engineCounts[i], NULL); + if (res != ZE_RESULT_SUCCESS || _sampling_engineCounts[i] == 0) { + printf("No engine groups found\n"); + return(-1); + } + _sampling_engineHandles[i] = (zes_engine_handle_t*)malloc(_sampling_engineCounts[i] * sizeof(zes_engine_handle_t)); + res = zesDeviceEnumEngineGroups(_sampling_hDevices[i], &_sampling_engineCounts[i], _sampling_engineHandles[i]); + if (res != ZE_RESULT_SUCCESS) { + printf("Failed to get engine group handles\n"); + free(_sampling_engineHandles); + return (-1); + } } free(hDriver); _sampling_initialized=1; @@ -874,10 +906,50 @@ void readEnergy(uint32_t deviceIdx, uint32_t domainIdx, uint64_t *ts_us, uint64_ } } +void readPerformance(uint32_t deviceIdx, computeEngineData *computeData, copyEngineData *copyData ){ + ze_result_t result; + for (uint32_t i = 0; i < _sampling_subDeviceCount; i++) { + computeData[i].computeActive = 0; + computeData[i].timestamp = 0; + copyData[i].copyActive = 0; + copyData[i].timestamp = 0; + } + for (uint32_t j = 0; j < _sampling_engineCounts[deviceIdx]; ++j) { + zes_engine_properties_t engineProp = {}; + result = zesEngineGetProperties(_sampling_engineHandles[deviceIdx][j], &engineProp); + if (result != ZE_RESULT_SUCCESS) { + printf("Failed to get engine properties\n"); + exit(-1); + } + if (engineProp.type == ZES_ENGINE_GROUP_COMPUTE_ALL){ + zes_engine_stats_t engineStats = {0}; + result = zesEngineGetActivity(_sampling_engineHandles[deviceIdx][j], &engineStats); + if (result != ZE_RESULT_SUCCESS) { + printf("Failed to get engine activity data\n"); + exit(-1); + } + computeData[engineProp.subdeviceId].computeActive = engineStats.activeTime; + computeData[engineProp.subdeviceId].timestamp = engineStats.timestamp; + } + if (engineProp.type == ZES_ENGINE_GROUP_COPY_ALL){ + zes_engine_stats_t engineStats = {0}; + result = zesEngineGetActivity(_sampling_engineHandles[deviceIdx][j], &engineStats); + if (result != ZE_RESULT_SUCCESS) { + printf("Failed to get engine activity data\n"); + exit(-1); + } + copyData[engineProp.subdeviceId].copyActive = engineStats.activeTime; + copyData[engineProp.subdeviceId].timestamp = engineStats.timestamp; + } + } +} + static void thapi_sampling_energy() { uint64_t ts_us; uint64_t energy_uj; uint32_t frequency; + computeEngineData computeE[_sampling_subDeviceCount]; + copyEngineData copyE[_sampling_subDeviceCount]; for (uint32_t i = 0; i < _sampling_deviceCount; i++) { for (uint32_t j = 0; j < _sampling_freqDomainCounts[i]; j++) { readFrequency(i, j, &frequency); @@ -887,6 +959,11 @@ static void thapi_sampling_energy() { readEnergy(i, j, &ts_us, &energy_uj); do_tracepoint(lttng_ust_ze_sampling, gpu_energy, (ze_device_handle_t)_sampling_hDevices[i], j, (uint64_t)energy_uj, ts_us); } + readPerformance(i, computeE, copyE); + for (uint32_t k=0; k<_sampling_subDeviceCount; k++){ + do_tracepoint(lttng_ust_ze_sampling, computeEngine , (ze_device_handle_t)_sampling_hDevices[i], k, computeE[k].computeActive, computeE[k].timestamp); + do_tracepoint(lttng_ust_ze_sampling, copyEngine, (ze_device_handle_t)_sampling_hDevices[i], k, copyE[k].copyActive, copyE[k].timestamp); + } } } diff --git a/ze/ze_events.yaml b/ze/ze_events.yaml index 4d914186..646a2dbb 100644 --- a/ze/ze_events.yaml +++ b/ze/ze_events.yaml @@ -1,10 +1,32 @@ --- lttng_ust_ze_sampling: events: + - name: copyEngine + args: + - [ ze_device_handle_t, hDevice ] + - [ uint32_t, subDevice ] + - [ uint64_t, activeTime ] + - [ uint64_t, timestamp ] + fields: + - [ ctf_integer_hex, uintptr_t, hDevice, "(uintptr_t)hDevice" ] + - [ ctf_integer, uint32_t, subDevice, "subDevice" ] + - [ ctf_integer, uint64_t, activeTime, "activeTime" ] + - [ ctf_integer, uint64_t, timestamp, "timestamp" ] + - name: computeEngine + args: + - [ ze_device_handle_t, hDevice ] + - [ uint32_t, subDevice ] + - [ uint64_t, activeTime ] + - [ uint64_t, timestamp ] + fields: + - [ ctf_integer_hex, uintptr_t, hDevice, "(uintptr_t)hDevice" ] + - [ ctf_integer, uint32_t, subDevice, "subDevice" ] + - [ ctf_integer, uint64_t, activeTime, "activeTime" ] + - [ ctf_integer, uint64_t, timestamp, "timestamp" ] - name: gpu_energy args: - [ ze_device_handle_t, hDevice ] - - [ uint32_t, domain] + - [ uint32_t, domain ] - [ uint64_t, energy ] - [ uint64_t, timestamp ] fields: @@ -15,8 +37,8 @@ lttng_ust_ze_sampling: - name: gpu_frequency args: - [ ze_device_handle_t, hDevice ] - - [ uint32_t, domain] - - [ uint64_t, timestamp ] + - [ uint32_t, domain ] + - [ uint64_t, timestamp ] - [ uint64_t, frequency ] fields: - [ ctf_integer_hex, uintptr_t, hDevice, "(uintptr_t)hDevice" ] diff --git a/ze/zeinterval_callbacks.cpp.erb b/ze/zeinterval_callbacks.cpp.erb index e3199e1e..86ba6b9f 100644 --- a/ze/zeinterval_callbacks.cpp.erb +++ b/ze/zeinterval_callbacks.cpp.erb @@ -124,6 +124,52 @@ static void create_and_enqueue_host_message(const char* hostname, const process_ state->downstream_message_queue.push(message); } +static void create_and_enqueue_computeEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts) { + zeinterval_callbacks_state* state = (zeinterval_callbacks_state*) zeinterval_iter_g->callbacks_state; + auto [it, inserted] = state->device_computeEngine_ref.insert({{hostname, process_id, hDevice, subDevice}, {activeTime, ts}}); + // First entry + if (inserted) + return; + + auto &[prev_activeTime, prev_ts] = it->second; + + bt_message *message = create_computeEU_message(hostname, process_id, + thread_id, hDevice, subDevice, + static_cast(((activeTime - prev_activeTime) / static_cast(ts-prev_ts))*100000.0), + prev_ts, + zeinterval_iter_g->dispatch->computeEU_event_class, + zeinterval_self_message_iterator_g, + zeinterval_iter_g->dispatch->stream, BACKEND_ZE); + state->downstream_message_queue.push(message); + prev_activeTime = activeTime; + prev_ts = ts; +} + +static void create_and_enqueue_copyEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts) { + zeinterval_callbacks_state* state = (zeinterval_callbacks_state*) zeinterval_iter_g->callbacks_state; + auto [it, inserted] = state->device_copyEngine_ref.insert({{hostname, process_id, hDevice, subDevice}, {activeTime, ts}}); + // First entry + if (inserted) + return; + + auto &[prev_activeTime, prev_ts] = it->second; + + bt_message *message = create_copyEU_message(hostname, process_id, + thread_id, hDevice, subDevice, + static_cast(((activeTime-prev_activeTime) / static_cast(ts-prev_ts))*100000.0), + prev_ts, + zeinterval_iter_g->dispatch->copyEU_event_class, + zeinterval_self_message_iterator_g, + zeinterval_iter_g->dispatch->stream, BACKEND_ZE); + state->downstream_message_queue.push(message); + prev_activeTime = activeTime; + prev_ts = ts; +} + + + static void create_and_enqueue_device_message( const char* hostname, const process_id_t process_id, const thread_id_t thread_id, thapi_device_id device, const char* commandname, const char* metadata, @@ -293,6 +339,20 @@ static void zeinterval_<%= dbt_event.name %>_callback( int64_t ns_from_origin; bt_clock_snapshot_get_ns_from_origin(bt_clock, &ns_from_origin); create_and_enqueue_frequency_message(hostname.c_str(), process_id, thread_id, (uintptr_t)hDevice, domain, ns_from_origin, frequency); + <% elsif dbt_event.name_unsanitized == "lttng_ust_ze_sampling:computeEngine" %> + const hostname_t hostname = borrow_hostname(bt_evt); + const process_id_t process_id = 0; + const thread_id_t thread_id = 0; + int64_t ns_from_origin; + bt_clock_snapshot_get_ns_from_origin(bt_clock, &ns_from_origin); + create_and_enqueue_computeEU_message(hostname.c_str(), process_id, thread_id, (uintptr_t)hDevice, subDevice, activeTime, ns_from_origin); + <% elsif dbt_event.name_unsanitized == "lttng_ust_ze_sampling:copyEngine" %> + const hostname_t hostname = borrow_hostname(bt_evt); + const process_id_t process_id = 0; + const thread_id_t thread_id = 0; + int64_t ns_from_origin; + bt_clock_snapshot_get_ns_from_origin(bt_clock, &ns_from_origin); + create_and_enqueue_copyEU_message(hostname.c_str(), process_id, thread_id, (uintptr_t)hDevice, subDevice, activeTime, ns_from_origin); <% elsif dbt_event.name_unsanitized.start_with?('lttng_ust_ze:') or dbt_event.name_unsanitized.start_with?('lttng_ust_zet:') or dbt_event.name_unsanitized.start_with?('lttng_ust_zes:') or diff --git a/ze/zeinterval_callbacks.hpp b/ze/zeinterval_callbacks.hpp index d48d0bef..466f2f08 100644 --- a/ze/zeinterval_callbacks.hpp +++ b/ze/zeinterval_callbacks.hpp @@ -18,10 +18,13 @@ typedef std::tuple hp_comma typedef std::tuple hp_module_t; typedef hp_device_t hpd_t; typedef hp_dsd_t hpdd_t; +typedef hp_dsdev_t hpdsd_t; typedef hp_event_t hpe_t; typedef hp_kernel_t hpk_t; typedef std::tuple clock_lttng_device_t; typedef std::tuple energy_timestamp_t; +typedef std::tuple computeEngine_timestamp_t; +typedef std::tuple copyEngine_timestamp_t; typedef std::tuple t_tfnm_m_d_ts_cld_t; typedef std::tuple l_tfnm_m_d_ts_t; @@ -58,6 +61,11 @@ struct zeinterval_callbacks_state { std::unordered_map> last_command; /*Energy */ std::unordered_map device_energy_ref; + /*computeEngine */ + std::unordered_map device_computeEngine_ref; + /*copyEngine */ + std::unordered_map device_copyEngine_ref; + }; template Date: Sun, 24 Sep 2023 22:30:17 -0500 Subject: [PATCH 02/10] Update ze/tracer_ze_helpers.include.c Co-authored-by: Brice Videau --- ze/tracer_ze_helpers.include.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index db605617..abe5a0fd 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -961,7 +961,7 @@ static void thapi_sampling_energy() { } readPerformance(i, computeE, copyE); for (uint32_t k=0; k<_sampling_subDeviceCount; k++){ - do_tracepoint(lttng_ust_ze_sampling, computeEngine , (ze_device_handle_t)_sampling_hDevices[i], k, computeE[k].computeActive, computeE[k].timestamp); + do_tracepoint(lttng_ust_ze_sampling, computeEngine, (ze_device_handle_t)_sampling_hDevices[i], k, computeE[k].computeActive, computeE[k].timestamp); do_tracepoint(lttng_ust_ze_sampling, copyEngine, (ze_device_handle_t)_sampling_hDevices[i], k, copyE[k].copyActive, copyE[k].timestamp); } } From 8b5c3b293df12a6b38ca4f29e6679fb1d19e1d94 Mon Sep 17 00:00:00 2001 From: Solomon Bekele Date: Sun, 24 Sep 2023 22:37:21 -0500 Subject: [PATCH 03/10] Apply suggestions from code review Co-authored-by: Brice Videau --- utils/xprof_utils.cpp | 1 - utils/xprof_utils.hpp | 9 +++++---- xprof/btx_timeline.cpp | 21 ++++++++++----------- xprof/interval.c.erb | 1 + ze/tracer_ze_helpers.include.c | 10 +++++----- ze/zeinterval_callbacks.cpp.erb | 9 +++------ 6 files changed, 24 insertions(+), 27 deletions(-) diff --git a/utils/xprof_utils.cpp b/utils/xprof_utils.cpp index 34e011b1..b50f93e4 100644 --- a/utils/xprof_utils.cpp +++ b/utils/xprof_utils.cpp @@ -214,7 +214,6 @@ bt_message* create_copyEU_message(const char* hostname, const process_id_t proce return message; } - bt_message* create_host_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, const char* name, const uint64_t ts, const uint64_t duration, const bool err, bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) { diff --git a/utils/xprof_utils.hpp b/utils/xprof_utils.hpp index cf13da4a..c4996929 100644 --- a/utils/xprof_utils.hpp +++ b/utils/xprof_utils.hpp @@ -48,6 +48,7 @@ typedef std::string thapi_function_name; typedef uintptr_t thapi_device_id; typedef uint32_t thapi_domain_id; typedef uint32_t thapi_sdevice_id; + // Represent a device and a sub device typedef std::tuple dsd_t; typedef std::tuple hp_t; @@ -122,12 +123,12 @@ bt_message* create_frequency_message(const char* hostname, const process_id_t pr bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); bt_message* create_computeEU_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id, - const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, - bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); bt_message* create_copyEU_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id, - const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, - bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); bt_message *create_host_message(const char *hostname, const process_id_t, const thread_id_t, const char *name, const uint64_t ts, const uint64_t duration, diff --git a/xprof/btx_timeline.cpp b/xprof/btx_timeline.cpp index 4dc15661..d06d33c5 100644 --- a/xprof/btx_timeline.cpp +++ b/xprof/btx_timeline.cpp @@ -32,6 +32,7 @@ struct timeline_dispatch_s { std::unordered_map hp_ddomain2pwrtracks; std::unordered_map hp_dsdev2cpetracks; std::unordered_map hp_dsdev2cpytracks; + perfetto_pruned::Trace trace; }; using timeline_dispatch_t = struct timeline_dispatch_s; @@ -117,7 +118,6 @@ static perfetto_uuid_t get_copyEU_track_uuuid(timeline_dispatch_t *dispatch, std return get_counter_track_uuuid(dispatch, dispatch->hp_dsdev2cpytracks, "CopyE Utilization", hostname, process_id, did, subDevice); } - static void add_event_frequency(timeline_dispatch_t *dispatch, std::string hostname, uint64_t process_id, uint64_t thread_id, uintptr_t did, uint32_t domain, uint64_t timestamp, uint64_t frequency) { @@ -146,9 +146,10 @@ static void add_event_power(timeline_dispatch_t *dispatch, std::string hostname, track_event->set_name("Power"); track_event->set_counter_value(power); } + static void add_event_computeEU(timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, uint64_t thread_id, uintptr_t did, - uint32_t subDevice, uint64_t timestamp, uint64_t activeTime) { + uint64_t process_id, uint64_t thread_id, uintptr_t did, + uint32_t subDevice, uint64_t timestamp, uint64_t activeTime) { perfetto_uuid_t track_uuid = get_computeEU_track_uuuid(dispatch, hostname, process_id, did, subDevice); auto *packet = dispatch->trace.add_packet(); packet->set_trusted_packet_sequence_id(10000); @@ -161,8 +162,8 @@ static void add_event_computeEU(timeline_dispatch_t *dispatch, std::string hostn } static void add_event_copyEU(timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, uint64_t thread_id, uintptr_t did, - uint32_t subDevice, uint64_t timestamp, uint64_t activeTime) { + uint64_t process_id, uint64_t thread_id, uintptr_t did, + uint32_t subDevice, uint64_t timestamp, uint64_t activeTime) { perfetto_uuid_t track_uuid = get_copyEU_track_uuuid(dispatch, hostname, process_id, did, subDevice); auto *packet = dispatch->trace.add_packet(); packet->set_trusted_packet_sequence_id(10000); @@ -174,7 +175,6 @@ static void add_event_copyEU(timeline_dispatch_t *dispatch, std::string hostname track_event->set_counter_value(activeTime); } - static void add_event_begin(timeline_dispatch_t *dispatch, perfetto_uuid_t uuid, timestamp_t begin, std::string name) { auto *packet = dispatch->trace.add_packet(); @@ -393,20 +393,19 @@ static void power_usr_callback(void *btx_handle, void *usr_data, const char *hos } static void computeEU_usr_callback(void *btx_handle, void *usr_data, const char *hostname, - int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, - uint64_t did, uint32_t subDevice, uint64_t activeTime) { + int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, + uint64_t did, uint32_t subDevice, uint64_t activeTime) { auto *dispatch = static_cast(usr_data); add_event_computeEU(dispatch, hostname, vpid, vtid, did, subDevice, ts, activeTime); } static void copyEU_usr_callback(void *btx_handle, void *usr_data, const char *hostname, - int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, - uint64_t did, uint32_t subDevice, uint64_t activeTime) { + int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, + uint64_t did, uint32_t subDevice, uint64_t activeTime) { auto *dispatch = static_cast(usr_data); add_event_copyEU(dispatch, hostname, vpid, vtid, did, subDevice, ts, activeTime); } - void btx_register_usr_callbacks(void *btx_handle) { btx_register_callbacks_lttng_host(btx_handle, &host_usr_callback); btx_register_callbacks_lttng_device(btx_handle, &device_usr_callback); diff --git a/xprof/interval.c.erb b/xprof/interval.c.erb index 9a57d52e..6e39ca27 100644 --- a/xprof/interval.c.erb +++ b/xprof/interval.c.erb @@ -101,6 +101,7 @@ bt_component_class_initialize_method_status <%= namespace %>_dispatch_initialize dispatch->power_event_class = create_lttng_power_event_class_message(trace_class, stream_class); dispatch->computeEU_event_class = create_lttng_computeEU_event_class_message(trace_class, stream_class); dispatch->copyEU_event_class = create_lttng_copyEU_event_class_message(trace_class, stream_class); + /* Create a default trace from (instance of `trace_class`) */ bt_trace *trace = bt_trace_create(trace_class); diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index abe5a0fd..9efa452e 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -835,7 +835,7 @@ int initializeHandles() { _sampling_powerDomainCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); _sampling_engineHandles = (zes_engine_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_engine_handle_t*)); - _sampling_engineCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); + _sampling_engineCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); for (uint32_t i = 0; i < _sampling_deviceCount; i++) { // Get frequency domains for each device @@ -874,10 +874,10 @@ int initializeHandles() { _sampling_engineHandles[i] = (zes_engine_handle_t*)malloc(_sampling_engineCounts[i] * sizeof(zes_engine_handle_t)); res = zesDeviceEnumEngineGroups(_sampling_hDevices[i], &_sampling_engineCounts[i], _sampling_engineHandles[i]); if (res != ZE_RESULT_SUCCESS) { - printf("Failed to get engine group handles\n"); - free(_sampling_engineHandles); - return (-1); - } + printf("Failed to get engine group handles\n"); + free(_sampling_engineHandles); + return (-1); + } } free(hDriver); _sampling_initialized=1; diff --git a/ze/zeinterval_callbacks.cpp.erb b/ze/zeinterval_callbacks.cpp.erb index 86ba6b9f..536f903b 100644 --- a/ze/zeinterval_callbacks.cpp.erb +++ b/ze/zeinterval_callbacks.cpp.erb @@ -136,7 +136,7 @@ static void create_and_enqueue_computeEU_message(const char* hostname, const pro bt_message *message = create_computeEU_message(hostname, process_id, thread_id, hDevice, subDevice, - static_cast(((activeTime - prev_activeTime) / static_cast(ts-prev_ts))*100000.0), + static_cast(((activeTime - prev_activeTime) / static_cast(ts-prev_ts)) * 100000.0), prev_ts, zeinterval_iter_g->dispatch->computeEU_event_class, zeinterval_self_message_iterator_g, @@ -167,9 +167,6 @@ static void create_and_enqueue_copyEU_message(const char* hostname, const proces prev_activeTime = activeTime; prev_ts = ts; } - - - static void create_and_enqueue_device_message( const char* hostname, const process_id_t process_id, const thread_id_t thread_id, thapi_device_id device, const char* commandname, const char* metadata, @@ -342,14 +339,14 @@ static void zeinterval_<%= dbt_event.name %>_callback( <% elsif dbt_event.name_unsanitized == "lttng_ust_ze_sampling:computeEngine" %> const hostname_t hostname = borrow_hostname(bt_evt); const process_id_t process_id = 0; - const thread_id_t thread_id = 0; + const thread_id_t thread_id = 0; int64_t ns_from_origin; bt_clock_snapshot_get_ns_from_origin(bt_clock, &ns_from_origin); create_and_enqueue_computeEU_message(hostname.c_str(), process_id, thread_id, (uintptr_t)hDevice, subDevice, activeTime, ns_from_origin); <% elsif dbt_event.name_unsanitized == "lttng_ust_ze_sampling:copyEngine" %> const hostname_t hostname = borrow_hostname(bt_evt); const process_id_t process_id = 0; - const thread_id_t thread_id = 0; + const thread_id_t thread_id = 0; int64_t ns_from_origin; bt_clock_snapshot_get_ns_from_origin(bt_clock, &ns_from_origin); create_and_enqueue_copyEU_message(hostname.c_str(), process_id, thread_id, (uintptr_t)hDevice, subDevice, activeTime, ns_from_origin); From 9c15987f6c0c333c371e0a32e63d23ed95923584 Mon Sep 17 00:00:00 2001 From: sbekele Date: Mon, 25 Sep 2023 20:49:48 +0000 Subject: [PATCH 04/10] sysman-metrics updated --- utils/xprof_utils.cpp | 8 ++-- utils/xprof_utils.hpp | 4 +- xprof/btx_interval_model.yaml | 10 ++-- xprof/btx_timeline.cpp | 12 ++--- ze/tracer_ze_helpers.include.c | 15 +++--- ze/zeinterval_callbacks.cpp.erb | 82 ++++++++++++++++++--------------- ze/zeinterval_callbacks.hpp | 1 - 7 files changed, 68 insertions(+), 64 deletions(-) diff --git a/utils/xprof_utils.cpp b/utils/xprof_utils.cpp index b50f93e4..35f5229c 100644 --- a/utils/xprof_utils.cpp +++ b/utils/xprof_utils.cpp @@ -119,7 +119,7 @@ bt_message* create_frequency_message(const char* hostname, const process_id_t pr } bt_message* create_computeEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, - const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + const uintptr_t hDevice, const uint32_t subDevice, const float activeTime, const uint64_t ts, bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) { /* Message creation */ @@ -161,13 +161,13 @@ bt_message* create_computeEU_message(const char* hostname, const process_id_t pr //activeTime bt_field *activeTime_field = bt_field_structure_borrow_member_field_by_index(payload_field,2); - bt_field_integer_unsigned_set_value(activeTime_field, activeTime); + bt_field_real_single_precision_set_value(activeTime_field, activeTime); return message; } bt_message* create_copyEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, - const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + const uintptr_t hDevice, const uint32_t subDevice, const float activeTime, const uint64_t ts, bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) { /* Message creation */ @@ -209,7 +209,7 @@ bt_message* create_copyEU_message(const char* hostname, const process_id_t proce //activeTime bt_field *activeTime_field = bt_field_structure_borrow_member_field_by_index(payload_field,2); - bt_field_integer_unsigned_set_value(activeTime_field, activeTime); + bt_field_real_single_precision_set_value(activeTime_field, activeTime); return message; } diff --git a/utils/xprof_utils.hpp b/utils/xprof_utils.hpp index c4996929..c04eed2f 100644 --- a/utils/xprof_utils.hpp +++ b/utils/xprof_utils.hpp @@ -123,11 +123,11 @@ bt_message* create_frequency_message(const char* hostname, const process_id_t pr bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); bt_message* create_computeEU_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id, - const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + const uintptr_t hDevice, const uint32_t subDevice, const float activeTime, const uint64_t ts, bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); bt_message* create_copyEU_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id, - const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + const uintptr_t hDevice, const uint32_t subDevice, const float activeTime, const uint64_t ts, bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); bt_message *create_host_message(const char *hostname, const process_id_t, const thread_id_t, diff --git a/xprof/btx_interval_model.yaml b/xprof/btx_interval_model.yaml index 78785670..d23459f1 100644 --- a/xprof/btx_interval_model.yaml +++ b/xprof/btx_interval_model.yaml @@ -143,9 +143,8 @@ :cast_type: uint32_t - :name: activeTime :field_class: - :type: integer_unsigned - :field_value_range: 64 - :cast_type: uint64_t + :type: single + :cast_type: float - :name: lttng:copyEU :payload_field_class: :type: structure @@ -162,6 +161,5 @@ :cast_type: uint32_t - :name: activeTime :field_class: - :type: integer_unsigned - :field_value_range: 64 - :cast_type: uint64_t + :type: single + :cast_type: float diff --git a/xprof/btx_timeline.cpp b/xprof/btx_timeline.cpp index d06d33c5..66987599 100644 --- a/xprof/btx_timeline.cpp +++ b/xprof/btx_timeline.cpp @@ -149,7 +149,7 @@ static void add_event_power(timeline_dispatch_t *dispatch, std::string hostname, static void add_event_computeEU(timeline_dispatch_t *dispatch, std::string hostname, uint64_t process_id, uint64_t thread_id, uintptr_t did, - uint32_t subDevice, uint64_t timestamp, uint64_t activeTime) { + uint32_t subDevice, uint64_t timestamp, float activeTime) { perfetto_uuid_t track_uuid = get_computeEU_track_uuuid(dispatch, hostname, process_id, did, subDevice); auto *packet = dispatch->trace.add_packet(); packet->set_trusted_packet_sequence_id(10000); @@ -158,12 +158,12 @@ static void add_event_computeEU(timeline_dispatch_t *dispatch, std::string hostn track_event->set_type(perfetto_pruned::TrackEvent::TYPE_COUNTER); track_event->set_track_uuid(track_uuid); track_event->set_name("computeEngine Usage"); - track_event->set_counter_value(activeTime); + track_event->set_double_counter_value(activeTime); } static void add_event_copyEU(timeline_dispatch_t *dispatch, std::string hostname, uint64_t process_id, uint64_t thread_id, uintptr_t did, - uint32_t subDevice, uint64_t timestamp, uint64_t activeTime) { + uint32_t subDevice, uint64_t timestamp, float activeTime) { perfetto_uuid_t track_uuid = get_copyEU_track_uuuid(dispatch, hostname, process_id, did, subDevice); auto *packet = dispatch->trace.add_packet(); packet->set_trusted_packet_sequence_id(10000); @@ -172,7 +172,7 @@ static void add_event_copyEU(timeline_dispatch_t *dispatch, std::string hostname track_event->set_type(perfetto_pruned::TrackEvent::TYPE_COUNTER); track_event->set_track_uuid(track_uuid); track_event->set_name("copyEngine Usage"); - track_event->set_counter_value(activeTime); + track_event->set_double_counter_value(activeTime); } static void add_event_begin(timeline_dispatch_t *dispatch, perfetto_uuid_t uuid, timestamp_t begin, @@ -394,14 +394,14 @@ static void power_usr_callback(void *btx_handle, void *usr_data, const char *hos static void computeEU_usr_callback(void *btx_handle, void *usr_data, const char *hostname, int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, - uint64_t did, uint32_t subDevice, uint64_t activeTime) { + uint64_t did, uint32_t subDevice, float activeTime) { auto *dispatch = static_cast(usr_data); add_event_computeEU(dispatch, hostname, vpid, vtid, did, subDevice, ts, activeTime); } static void copyEU_usr_callback(void *btx_handle, void *usr_data, const char *hostname, int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, - uint64_t did, uint32_t subDevice, uint64_t activeTime) { + uint64_t did, uint32_t subDevice, float activeTime) { auto *dispatch = static_cast(usr_data); add_event_copyEU(dispatch, hostname, vpid, vtid, did, subDevice, ts, activeTime); } diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index 9efa452e..a4a42125 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -768,7 +768,6 @@ static uint32_t _sampling_subDeviceCount; static uint32_t* _sampling_freqDomainCounts; static uint32_t* _sampling_powerDomainCounts; static uint32_t* _sampling_engineCounts; -static uint32_t* _sampling_engineCounts; typedef struct { uint64_t timestamp; @@ -797,7 +796,7 @@ int initializeHandles() { #endif // Query driver - uint32_t driverCount; + uint32_t driverCount = 0; res = ZE_DRIVER_GET_PTR(&driverCount, NULL); if (res != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("1st ZE_DRIVER_GET_PTR", res); @@ -851,7 +850,6 @@ int initializeHandles() { printf("zesDeviceEnumFrequencyDomains failed for device %d: %d\n", i, res); return(-1); } - // Get power domains for each device res = zesDeviceEnumPowerDomains(_sampling_hDevices[i], &_sampling_powerDomainCounts[i], NULL); if (res != ZE_RESULT_SUCCESS) { @@ -871,6 +869,7 @@ int initializeHandles() { printf("No engine groups found\n"); return(-1); } + _sampling_engineHandles[i] = (zes_engine_handle_t*)malloc(_sampling_engineCounts[i] * sizeof(zes_engine_handle_t)); res = zesDeviceEnumEngineGroups(_sampling_hDevices[i], &_sampling_engineCounts[i], _sampling_engineHandles[i]); if (res != ZE_RESULT_SUCCESS) { @@ -889,7 +888,6 @@ void readFrequency(uint32_t deviceIdx, uint32_t domainIdx, uint32_t *frequency) *frequency=0; zes_freq_state_t freqState; if (zesFrequencyGetState(_sampling_hFrequencies[deviceIdx][domainIdx], &freqState) == ZE_RESULT_SUCCESS) { - // printf("Device %d - Frequency Domain %d: Current frequency: %lf MHz\n", deviceIdx, domainIdx, freqState.actual); *frequency = freqState.actual; } } @@ -900,7 +898,6 @@ void readEnergy(uint32_t deviceIdx, uint32_t domainIdx, uint64_t *ts_us, uint64_ *energy_uj = 0; zes_power_energy_counter_t energyCounter; if (zesPowerGetEnergyCounter(_sampling_hPowers[deviceIdx][domainIdx], &energyCounter) == ZE_RESULT_SUCCESS) { - // printf("Device %d - Power Domain %d: Total energy consumption: %lu Joules\n", deviceIdx, domainIdx, energyCounter.energy); *ts_us = energyCounter.timestamp; *energy_uj = energyCounter.energy; } @@ -953,16 +950,16 @@ static void thapi_sampling_energy() { for (uint32_t i = 0; i < _sampling_deviceCount; i++) { for (uint32_t j = 0; j < _sampling_freqDomainCounts[i]; j++) { readFrequency(i, j, &frequency); - do_tracepoint(lttng_ust_ze_sampling, gpu_frequency, (ze_device_handle_t)_sampling_hDevices[i], j, ts_us, frequency); + tracepoint(lttng_ust_ze_sampling, gpu_frequency, (ze_device_handle_t)_sampling_hDevices[i], j, ts_us, frequency); } for (uint32_t j = 0; j < _sampling_powerDomainCounts[i]; j++) { readEnergy(i, j, &ts_us, &energy_uj); - do_tracepoint(lttng_ust_ze_sampling, gpu_energy, (ze_device_handle_t)_sampling_hDevices[i], j, (uint64_t)energy_uj, ts_us); + tracepoint(lttng_ust_ze_sampling, gpu_energy, (ze_device_handle_t)_sampling_hDevices[i], j, (uint64_t)energy_uj, ts_us); } readPerformance(i, computeE, copyE); for (uint32_t k=0; k<_sampling_subDeviceCount; k++){ - do_tracepoint(lttng_ust_ze_sampling, computeEngine, (ze_device_handle_t)_sampling_hDevices[i], k, computeE[k].computeActive, computeE[k].timestamp); - do_tracepoint(lttng_ust_ze_sampling, copyEngine, (ze_device_handle_t)_sampling_hDevices[i], k, copyE[k].copyActive, copyE[k].timestamp); + tracepoint(lttng_ust_ze_sampling, computeEngine, (ze_device_handle_t)_sampling_hDevices[i], k, computeE[k].computeActive, computeE[k].timestamp); + tracepoint(lttng_ust_ze_sampling, copyEngine, (ze_device_handle_t)_sampling_hDevices[i], k, copyE[k].copyActive, copyE[k].timestamp); } } } diff --git a/ze/zeinterval_callbacks.cpp.erb b/ze/zeinterval_callbacks.cpp.erb index 536f903b..5fd5a6a5 100644 --- a/ze/zeinterval_callbacks.cpp.erb +++ b/ze/zeinterval_callbacks.cpp.erb @@ -53,6 +53,7 @@ static uint64_t convert_device_cycle(uint64_t device_cycle, } static uint64_t compute_and_convert_delta(uint64_t start, uint64_t end, const ze_device_properties_t &device_property) { + assert (device_property.kernelTimestampValidBits <= 64); const uint64_t max_val = ((uint64_t)1 << device_property.kernelTimestampValidBits) - 1; start &= max_val; @@ -78,8 +79,10 @@ void *init_zeinterval_callbacks_state() { return (void*) s; } -static void create_and_enqueue_power_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, - const uintptr_t hDevice, const uint32_t domain, const uint64_t energy, const uint64_t ts) { +static void create_and_enqueue_power_message(const char* hostname, const process_id_t process_id, + const thread_id_t thread_id, const uintptr_t hDevice, + const uint32_t domain, const uint64_t energy, const uint64_t ts) { + zeinterval_callbacks_state* state = (zeinterval_callbacks_state*) zeinterval_iter_g->callbacks_state; auto [it, inserted] = state->device_energy_ref.insert({{hostname, process_id, hDevice, domain}, {energy, ts}}); // First entry @@ -89,84 +92,91 @@ static void create_and_enqueue_power_message(const char* hostname, const process auto &[prev_energy, prev_ts] = it->second; bt_message *message = create_power_message(hostname, process_id, - thread_id, hDevice, domain, - static_cast(((energy-prev_energy) / static_cast(ts-prev_ts))*1000.0), - prev_ts, - zeinterval_iter_g->dispatch->power_event_class, - zeinterval_self_message_iterator_g, - zeinterval_iter_g->dispatch->stream, BACKEND_ZE); + thread_id, hDevice, domain, + static_cast(((energy-prev_energy) / static_cast(ts-prev_ts)) * 1000.0), + prev_ts, + zeinterval_iter_g->dispatch->power_event_class, + zeinterval_self_message_iterator_g, + zeinterval_iter_g->dispatch->stream, BACKEND_ZE); state->downstream_message_queue.push(message); prev_energy = energy; prev_ts = ts; } -static void create_and_enqueue_frequency_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, - const uintptr_t hDevice, const uint32_t domain, const uint64_t ts, const uint64_t frequency) { - bt_message *message = create_frequency_message(hostname, process_id, thread_id, hDevice, domain, ts, frequency, - zeinterval_iter_g->dispatch->frequency_event_class, - zeinterval_self_message_iterator_g, - zeinterval_iter_g->dispatch->stream, BACKEND_ZE); +static void create_and_enqueue_frequency_message(const char* hostname, const process_id_t process_id, + const thread_id_t thread_id, const uintptr_t hDevice, + const uint32_t domain, const uint64_t ts, const uint64_t frequency) { + + bt_message *message = create_frequency_message(hostname, process_id, thread_id, hDevice, domain, ts, frequency, + zeinterval_iter_g->dispatch->frequency_event_class, + zeinterval_self_message_iterator_g, + zeinterval_iter_g->dispatch->stream, BACKEND_ZE); zeinterval_callbacks_state* state = (zeinterval_callbacks_state*) zeinterval_iter_g->callbacks_state; state->downstream_message_queue.push(message); } -static void create_and_enqueue_host_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, const char* name, - const uint64_t ts, const uint64_t duration, const bool err) { +static void create_and_enqueue_host_message(const char* hostname, const process_id_t process_id, + const thread_id_t thread_id, const char* name, const uint64_t ts, + const uint64_t duration, const bool err) { /* Message creation */ bt_message *message = create_host_message(hostname, process_id, thread_id, name, ts, duration, err, - zeinterval_iter_g->dispatch->host_event_class, - zeinterval_self_message_iterator_g, - zeinterval_iter_g->dispatch->stream, BACKEND_ZE); + zeinterval_iter_g->dispatch->host_event_class, + zeinterval_self_message_iterator_g, + zeinterval_iter_g->dispatch->stream, BACKEND_ZE); zeinterval_callbacks_state* state = (zeinterval_callbacks_state*) zeinterval_iter_g->callbacks_state; state->downstream_message_queue.push(message); } -static void create_and_enqueue_computeEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, - const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts) { +static void create_and_enqueue_computeEU_message(const char* hostname, const process_id_t process_id, + const thread_id_t thread_id, const uintptr_t hDevice, + const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts) { + zeinterval_callbacks_state* state = (zeinterval_callbacks_state*) zeinterval_iter_g->callbacks_state; auto [it, inserted] = state->device_computeEngine_ref.insert({{hostname, process_id, hDevice, subDevice}, {activeTime, ts}}); // First entry if (inserted) - return; + return; auto &[prev_activeTime, prev_ts] = it->second; bt_message *message = create_computeEU_message(hostname, process_id, - thread_id, hDevice, subDevice, - static_cast(((activeTime - prev_activeTime) / static_cast(ts-prev_ts)) * 100000.0), - prev_ts, - zeinterval_iter_g->dispatch->computeEU_event_class, - zeinterval_self_message_iterator_g, - zeinterval_iter_g->dispatch->stream, BACKEND_ZE); + thread_id, hDevice, subDevice, + ((activeTime - prev_activeTime) / static_cast(ts-prev_ts)) * 1000.0, + prev_ts, + zeinterval_iter_g->dispatch->computeEU_event_class, + zeinterval_self_message_iterator_g, + zeinterval_iter_g->dispatch->stream, BACKEND_ZE); state->downstream_message_queue.push(message); prev_activeTime = activeTime; prev_ts = ts; } static void create_and_enqueue_copyEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, - const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts) { + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts) { + zeinterval_callbacks_state* state = (zeinterval_callbacks_state*) zeinterval_iter_g->callbacks_state; auto [it, inserted] = state->device_copyEngine_ref.insert({{hostname, process_id, hDevice, subDevice}, {activeTime, ts}}); // First entry if (inserted) - return; + return; auto &[prev_activeTime, prev_ts] = it->second; bt_message *message = create_copyEU_message(hostname, process_id, - thread_id, hDevice, subDevice, - static_cast(((activeTime-prev_activeTime) / static_cast(ts-prev_ts))*100000.0), - prev_ts, - zeinterval_iter_g->dispatch->copyEU_event_class, - zeinterval_self_message_iterator_g, - zeinterval_iter_g->dispatch->stream, BACKEND_ZE); + thread_id, hDevice, subDevice, + ((activeTime-prev_activeTime) / static_cast(ts-prev_ts)) * 1000.0, + prev_ts, + zeinterval_iter_g->dispatch->copyEU_event_class, + zeinterval_self_message_iterator_g, + zeinterval_iter_g->dispatch->stream, BACKEND_ZE); state->downstream_message_queue.push(message); prev_activeTime = activeTime; prev_ts = ts; } + static void create_and_enqueue_device_message( const char* hostname, const process_id_t process_id, const thread_id_t thread_id, thapi_device_id device, const char* commandname, const char* metadata, diff --git a/ze/zeinterval_callbacks.hpp b/ze/zeinterval_callbacks.hpp index 466f2f08..3297cdd8 100644 --- a/ze/zeinterval_callbacks.hpp +++ b/ze/zeinterval_callbacks.hpp @@ -65,7 +65,6 @@ struct zeinterval_callbacks_state { std::unordered_map device_computeEngine_ref; /*copyEngine */ std::unordered_map device_copyEngine_ref; - }; template Date: Fri, 29 Sep 2023 10:45:32 -0500 Subject: [PATCH 05/10] Update ze/tracer_ze_helpers.include.c --- ze/tracer_ze_helpers.include.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index a4a42125..e203b696 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -763,8 +763,8 @@ static ze_device_handle_t* _sampling_hDevices; static zes_freq_handle_t** _sampling_hFrequencies; static zes_pwr_handle_t** _sampling_hPowers; static zes_engine_handle_t** _sampling_engineHandles; -static uint32_t _sampling_deviceCount; -static uint32_t _sampling_subDeviceCount; +static uint32_t _sampling_deviceCount = 0; +static uint32_t _sampling_subDeviceCount = 0; static uint32_t* _sampling_freqDomainCounts; static uint32_t* _sampling_powerDomainCounts; static uint32_t* _sampling_engineCounts; From 58dd9e8d176c2ef23e3cb8ea8acbb181d81a8456 Mon Sep 17 00:00:00 2001 From: sbekele Date: Fri, 29 Sep 2023 19:58:59 +0000 Subject: [PATCH 06/10] tracepoint_enabled check added --- ze/tracer_ze_helpers.include.c | 51 +++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index e203b696..a77e39bc 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -903,13 +903,11 @@ void readEnergy(uint32_t deviceIdx, uint32_t domainIdx, uint64_t *ts_us, uint64_ } } -void readPerformance(uint32_t deviceIdx, computeEngineData *computeData, copyEngineData *copyData ){ +void readComputeE(uint32_t deviceIdx, computeEngineData *computeData ){ ze_result_t result; for (uint32_t i = 0; i < _sampling_subDeviceCount; i++) { computeData[i].computeActive = 0; computeData[i].timestamp = 0; - copyData[i].copyActive = 0; - copyData[i].timestamp = 0; } for (uint32_t j = 0; j < _sampling_engineCounts[deviceIdx]; ++j) { zes_engine_properties_t engineProp = {}; @@ -928,6 +926,22 @@ void readPerformance(uint32_t deviceIdx, computeEngineData *computeData, copyEng computeData[engineProp.subdeviceId].computeActive = engineStats.activeTime; computeData[engineProp.subdeviceId].timestamp = engineStats.timestamp; } + } +} + +void readCopyE(uint32_t deviceIdx, copyEngineData *copyData ){ + ze_result_t result; + for (uint32_t i = 0; i < _sampling_subDeviceCount; i++) { + copyData[i].copyActive = 0; + copyData[i].timestamp = 0; + } + for (uint32_t j = 0; j < _sampling_engineCounts[deviceIdx]; ++j) { + zes_engine_properties_t engineProp = {}; + result = zesEngineGetProperties(_sampling_engineHandles[deviceIdx][j], &engineProp); + if (result != ZE_RESULT_SUCCESS) { + printf("Failed to get engine properties\n"); + exit(-1); + } if (engineProp.type == ZES_ENGINE_GROUP_COPY_ALL){ zes_engine_stats_t engineStats = {0}; result = zesEngineGetActivity(_sampling_engineHandles[deviceIdx][j], &engineStats); @@ -948,18 +962,29 @@ static void thapi_sampling_energy() { computeEngineData computeE[_sampling_subDeviceCount]; copyEngineData copyE[_sampling_subDeviceCount]; for (uint32_t i = 0; i < _sampling_deviceCount; i++) { - for (uint32_t j = 0; j < _sampling_freqDomainCounts[i]; j++) { - readFrequency(i, j, &frequency); - tracepoint(lttng_ust_ze_sampling, gpu_frequency, (ze_device_handle_t)_sampling_hDevices[i], j, ts_us, frequency); + if (tracepoint_enabled(lttng_ust_ze_sampling, gpu_frequency)){ + for (uint32_t j = 0; j < _sampling_freqDomainCounts[i]; j++) { + readFrequency(i, j, &frequency); + do_tracepoint(lttng_ust_ze_sampling, gpu_frequency, (ze_device_handle_t)_sampling_hDevices[i], j, ts_us, frequency); + } } - for (uint32_t j = 0; j < _sampling_powerDomainCounts[i]; j++) { - readEnergy(i, j, &ts_us, &energy_uj); - tracepoint(lttng_ust_ze_sampling, gpu_energy, (ze_device_handle_t)_sampling_hDevices[i], j, (uint64_t)energy_uj, ts_us); + if (tracepoint_enabled(lttng_ust_ze_sampling, gpu_energy)){ + for (uint32_t j = 0; j < _sampling_powerDomainCounts[i]; j++) { + readEnergy(i, j, &ts_us, &energy_uj); + do_tracepoint(lttng_ust_ze_sampling, gpu_energy, (ze_device_handle_t)_sampling_hDevices[i], j, (uint64_t)energy_uj, ts_us); + } } - readPerformance(i, computeE, copyE); - for (uint32_t k=0; k<_sampling_subDeviceCount; k++){ - tracepoint(lttng_ust_ze_sampling, computeEngine, (ze_device_handle_t)_sampling_hDevices[i], k, computeE[k].computeActive, computeE[k].timestamp); - tracepoint(lttng_ust_ze_sampling, copyEngine, (ze_device_handle_t)_sampling_hDevices[i], k, copyE[k].copyActive, copyE[k].timestamp); + if (tracepoint_enabled(lttng_ust_ze_sampling, computeEngine)){ + readComputeE(i, computeE); + for (uint32_t k=0; k<_sampling_subDeviceCount; k++){ + do_tracepoint(lttng_ust_ze_sampling, computeEngine, (ze_device_handle_t)_sampling_hDevices[i], k, computeE[k].computeActive, computeE[k].timestamp); + } + } + if (tracepoint_enabled(lttng_ust_ze_sampling, copyEngine)){ + readCopyE(i, copyE); + for (uint32_t k=0; k<_sampling_subDeviceCount; k++){ + do_tracepoint(lttng_ust_ze_sampling, copyEngine, (ze_device_handle_t)_sampling_hDevices[i], k, copyE[k].copyActive, copyE[k].timestamp); + } } } } From 759e669c98a064e7db96bfff2426a27a66fd9a66 Mon Sep 17 00:00:00 2001 From: Solomon Bekele Date: Fri, 29 Sep 2023 15:36:34 -0500 Subject: [PATCH 07/10] Apply suggestions from code review Co-authored-by: Brice Videau --- xprof/btx_timeline.cpp | 4 ++-- ze/tracer_ze_helpers.include.c | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/xprof/btx_timeline.cpp b/xprof/btx_timeline.cpp index 66987599..df6f9fe2 100644 --- a/xprof/btx_timeline.cpp +++ b/xprof/btx_timeline.cpp @@ -109,12 +109,12 @@ static perfetto_uuid_t get_power_track_uuuid(timeline_dispatch_t *dispatch, std: } static perfetto_uuid_t get_computeEU_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, thapi_device_id did, thapi_sdevice_id subDevice) { + uint64_t process_id, thapi_device_id did, thapi_sdevice_id subDevice) { return get_counter_track_uuuid(dispatch, dispatch->hp_dsdev2cpetracks, "ComputeE Utilization", hostname, process_id, did, subDevice); } static perfetto_uuid_t get_copyEU_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, thapi_device_id did, thapi_sdevice_id subDevice) { + uint64_t process_id, thapi_device_id did, thapi_sdevice_id subDevice) { return get_counter_track_uuuid(dispatch, dispatch->hp_dsdev2cpytracks, "CopyE Utilization", hostname, process_id, did, subDevice); } diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index a77e39bc..e0c19a62 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -838,6 +838,7 @@ int initializeHandles() { for (uint32_t i = 0; i < _sampling_deviceCount; i++) { // Get frequency domains for each device + _sampling_freqDomainCounts[i] = 0; res = zesDeviceEnumFrequencyDomains(_sampling_hDevices[i], &_sampling_freqDomainCounts[i], NULL); if (res != ZE_RESULT_SUCCESS) { printf("zesDeviceEnumFrequencyDomains (count query) failed for device %d: %d\n", i, res); @@ -851,6 +852,7 @@ int initializeHandles() { return(-1); } // Get power domains for each device + _sampling_powerDomainCounts[i] = 0; res = zesDeviceEnumPowerDomains(_sampling_hDevices[i], &_sampling_powerDomainCounts[i], NULL); if (res != ZE_RESULT_SUCCESS) { printf("zesDeviceEnumPowerDomains (count query) failed for device %d: %d\n", i, res); @@ -864,6 +866,7 @@ int initializeHandles() { return(-1); } // Get the available engines for each device + _sampling_engineCounts[i] = 0; res = zesDeviceEnumEngineGroups(_sampling_hDevices[i], &_sampling_engineCounts[i], NULL); if (res != ZE_RESULT_SUCCESS || _sampling_engineCounts[i] == 0) { printf("No engine groups found\n"); From bff3be7d41f0b555f148918ccedd53315aa3e30a Mon Sep 17 00:00:00 2001 From: sbekele Date: Sat, 30 Sep 2023 05:05:42 +0000 Subject: [PATCH 08/10] Suggestions incorporated --- xprof/btx_timeline.cpp | 1 + ze/tracer_ze_helpers.include.c | 85 ++++++++++++++++++++-------------- 2 files changed, 50 insertions(+), 36 deletions(-) diff --git a/xprof/btx_timeline.cpp b/xprof/btx_timeline.cpp index df6f9fe2..264bbd3f 100644 --- a/xprof/btx_timeline.cpp +++ b/xprof/btx_timeline.cpp @@ -105,6 +105,7 @@ static perfetto_uuid_t get_frequency_track_uuuid(timeline_dispatch_t *dispatch, } static perfetto_uuid_t get_power_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, uint64_t process_id, thapi_device_id did, thapi_domain_id domain) { + //Extra leading space in the name field to make GPU Power the first track return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2pwrtracks, " GPU Power", hostname, process_id, did, domain); } diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index e0c19a62..3dd7d0b1 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -814,18 +814,22 @@ int initializeHandles() { res = ZE_DEVICE_GET_PTR(hDriver[0], &_sampling_deviceCount, NULL); if (res != ZE_RESULT_SUCCESS || _sampling_deviceCount == 0) { fprintf(stderr, "ERROR: No device found!\n"); - _ZE_ERROR_MSG("ZE_DEVICE_GET_PTR", res); + _ZE_ERROR_MSG("1st ZE_DEVICE_GET_PTR", res); return -1; } _sampling_hDevices = (ze_device_handle_t*) malloc(_sampling_deviceCount * sizeof(ze_device_handle_t)); res = ZE_DEVICE_GET_PTR(hDriver[0], &_sampling_deviceCount, _sampling_hDevices); if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZE_DRIVER_GET_PTR", res); + _ZE_ERROR_MSG("2nd ZE_DEVICE_GET_PTR", res); return -1; } //Get no sub-devices - zeDeviceGetSubDevices(_sampling_hDevices[0], &_sampling_subDeviceCount, NULL); + res = ZE_DEVICE_GET_SUB_DEVICES_PTR(_sampling_hDevices[0], &_sampling_subDeviceCount, NULL); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZE_DEVICE_GET_SUB_DEVICES_PTR", res); + return -1; + } _sampling_hFrequencies = (zes_freq_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_freq_handle_t*)); _sampling_freqDomainCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); @@ -839,46 +843,46 @@ int initializeHandles() { for (uint32_t i = 0; i < _sampling_deviceCount; i++) { // Get frequency domains for each device _sampling_freqDomainCounts[i] = 0; - res = zesDeviceEnumFrequencyDomains(_sampling_hDevices[i], &_sampling_freqDomainCounts[i], NULL); + res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_freqDomainCounts[i], NULL); if (res != ZE_RESULT_SUCCESS) { - printf("zesDeviceEnumFrequencyDomains (count query) failed for device %d: %d\n", i, res); - return(-1); + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); + return -1; } _sampling_hFrequencies[i] = (zes_freq_handle_t*) malloc(_sampling_freqDomainCounts[i] * sizeof(zes_freq_handle_t)); - res = zesDeviceEnumFrequencyDomains(_sampling_hDevices[i], &_sampling_freqDomainCounts[i], _sampling_hFrequencies[i]); + res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_freqDomainCounts[i], _sampling_hFrequencies[i]); if (res != ZE_RESULT_SUCCESS) { - printf("zesDeviceEnumFrequencyDomains failed for device %d: %d\n", i, res); - return(-1); + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); + return -1; } // Get power domains for each device _sampling_powerDomainCounts[i] = 0; - res = zesDeviceEnumPowerDomains(_sampling_hDevices[i], &_sampling_powerDomainCounts[i], NULL); + res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_powerDomainCounts[i], NULL); if (res != ZE_RESULT_SUCCESS) { - printf("zesDeviceEnumPowerDomains (count query) failed for device %d: %d\n", i, res); - return(-1); + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); + return -1; } _sampling_hPowers[i] = (zes_pwr_handle_t*) malloc(_sampling_powerDomainCounts[i] * sizeof(zes_pwr_handle_t)); - res = zesDeviceEnumPowerDomains(_sampling_hDevices[i], &_sampling_powerDomainCounts[i], _sampling_hPowers[i]); + res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_powerDomainCounts[i], _sampling_hPowers[i]); if (res != ZE_RESULT_SUCCESS) { - printf("zesDeviceEnumPowerDomains failed for device %d: %d\n", i, res); - return(-1); + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); + return -1; } // Get the available engines for each device _sampling_engineCounts[i] = 0; - res = zesDeviceEnumEngineGroups(_sampling_hDevices[i], &_sampling_engineCounts[i], NULL); + res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[i], &_sampling_engineCounts[i], NULL); if (res != ZE_RESULT_SUCCESS || _sampling_engineCounts[i] == 0) { - printf("No engine groups found\n"); - return(-1); + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); + return -1; } _sampling_engineHandles[i] = (zes_engine_handle_t*)malloc(_sampling_engineCounts[i] * sizeof(zes_engine_handle_t)); - res = zesDeviceEnumEngineGroups(_sampling_hDevices[i], &_sampling_engineCounts[i], _sampling_engineHandles[i]); + res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[i], &_sampling_engineCounts[i], _sampling_engineHandles[i]); if (res != ZE_RESULT_SUCCESS) { - printf("Failed to get engine group handles\n"); + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); free(_sampling_engineHandles); - return (-1); + return -1; } } free(hDriver); @@ -888,22 +892,29 @@ int initializeHandles() { void readFrequency(uint32_t deviceIdx, uint32_t domainIdx, uint32_t *frequency) { if (!_sampling_initialized) return; + ze_result_t result; *frequency=0; zes_freq_state_t freqState; - if (zesFrequencyGetState(_sampling_hFrequencies[deviceIdx][domainIdx], &freqState) == ZE_RESULT_SUCCESS) { - *frequency = freqState.actual; + result = ZES_FREQUENCY_GET_STATE_PTR(_sampling_hFrequencies[deviceIdx][domainIdx], &freqState); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_FREQUENCY_GET_STATE_PTR", result); } + *frequency = freqState.actual; } void readEnergy(uint32_t deviceIdx, uint32_t domainIdx, uint64_t *ts_us, uint64_t *energy_uj) { if (!_sampling_initialized) return; + ze_result_t result; *ts_us = 0; *energy_uj = 0; zes_power_energy_counter_t energyCounter; - if (zesPowerGetEnergyCounter(_sampling_hPowers[deviceIdx][domainIdx], &energyCounter) == ZE_RESULT_SUCCESS) { - *ts_us = energyCounter.timestamp; - *energy_uj = energyCounter.energy; + result = ZES_POWER_GET_ENERGY_COUNTER_PTR(_sampling_hPowers[deviceIdx][domainIdx], &energyCounter); + if (result != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("ZES_POWER_GET_ENERGY_COUNTER_PTR", result); + exit(-1); } + *ts_us = energyCounter.timestamp; + *energy_uj = energyCounter.energy; } void readComputeE(uint32_t deviceIdx, computeEngineData *computeData ){ @@ -913,17 +924,18 @@ void readComputeE(uint32_t deviceIdx, computeEngineData *computeData ){ computeData[i].timestamp = 0; } for (uint32_t j = 0; j < _sampling_engineCounts[deviceIdx]; ++j) { - zes_engine_properties_t engineProp = {}; - result = zesEngineGetProperties(_sampling_engineHandles[deviceIdx][j], &engineProp); + zes_engine_properties_t engineProp = {0}; + engineProp.stype = ZES_STRUCTURE_TYPE_ENGINE_PROPERTIES; + result = ZES_ENGINE_GET_PROPERTIES_PTR(_sampling_engineHandles[deviceIdx][j], &engineProp); if (result != ZE_RESULT_SUCCESS) { - printf("Failed to get engine properties\n"); + _ZE_ERROR_MSG("ZES_ENGINE_GET_PROPERTIES_PTR", result); exit(-1); } if (engineProp.type == ZES_ENGINE_GROUP_COMPUTE_ALL){ zes_engine_stats_t engineStats = {0}; - result = zesEngineGetActivity(_sampling_engineHandles[deviceIdx][j], &engineStats); + result = ZES_ENGINE_GET_ACTIVITY_PTR(_sampling_engineHandles[deviceIdx][j], &engineStats); if (result != ZE_RESULT_SUCCESS) { - printf("Failed to get engine activity data\n"); + _ZE_ERROR_MSG("ZES_ENGINE_GET_ACTIVITY_PTR", result); exit(-1); } computeData[engineProp.subdeviceId].computeActive = engineStats.activeTime; @@ -939,17 +951,18 @@ void readCopyE(uint32_t deviceIdx, copyEngineData *copyData ){ copyData[i].timestamp = 0; } for (uint32_t j = 0; j < _sampling_engineCounts[deviceIdx]; ++j) { - zes_engine_properties_t engineProp = {}; - result = zesEngineGetProperties(_sampling_engineHandles[deviceIdx][j], &engineProp); + zes_engine_properties_t engineProp = {0}; + engineProp.stype = ZES_STRUCTURE_TYPE_ENGINE_PROPERTIES; + result = ZES_ENGINE_GET_PROPERTIES_PTR(_sampling_engineHandles[deviceIdx][j], &engineProp); if (result != ZE_RESULT_SUCCESS) { - printf("Failed to get engine properties\n"); + _ZE_ERROR_MSG("ZES_ENGINE_GET_PROPERTIES_PTR", result); exit(-1); } if (engineProp.type == ZES_ENGINE_GROUP_COPY_ALL){ zes_engine_stats_t engineStats = {0}; - result = zesEngineGetActivity(_sampling_engineHandles[deviceIdx][j], &engineStats); + result = ZES_ENGINE_GET_ACTIVITY_PTR(_sampling_engineHandles[deviceIdx][j], &engineStats); if (result != ZE_RESULT_SUCCESS) { - printf("Failed to get engine activity data\n"); + _ZE_ERROR_MSG("ZES_ENGINE_GET_ACTIVITY_PTR", result); exit(-1); } copyData[engineProp.subdeviceId].copyActive = engineStats.activeTime; From 4b8fdab586d9099e54ac42a24fe8bcbbd5234bab Mon Sep 17 00:00:00 2001 From: sbekele Date: Fri, 6 Oct 2023 15:23:00 +0000 Subject: [PATCH 09/10] Separation of initialization --- ze/tracer_ze_helpers.include.c | 186 +++++++++++++++++++-------------- 1 file changed, 110 insertions(+), 76 deletions(-) diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index 3dd7d0b1..90d8f6ad 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -753,21 +753,29 @@ static inline void _dump_memory_info(ze_command_list_handle_t hCommandList, cons } //////////////////////////////////////////// -#define _ZE_ERROR_MSG(NAME,RES) {fprintf(stderr,"%s() failed at %d(%s): res=%x\n",(NAME),__LINE__,__FILE__,(RES));} -#define _ZE_ERROR_MSG_NOTERMINATE(NAME,RES) {fprintf(stderr,"%s() error at %d(%s): res=%x\n",(NAME),__LINE__,__FILE__,(RES));} -#define _ERROR_MSG(MSG) {perror((MSG)); fprintf(stderr,"errno=%d at %d(%s)",errno,__LINE__,__FILE__);} +#define _ZE_ERROR_MSG(NAME,RES) do {\ + fprintf(stderr,"%s() failed at %d(%s): res=%x\n",(NAME),__LINE__,__FILE__,(RES));\ +} while (0) +#define _ZE_ERROR_MSG_NOTERMINATE(NAME,RES) do {\ + fprintf(stderr,"%s() error at %d(%s): res=%x\n",(NAME),__LINE__,__FILE__,(RES));\ +} while (0) +#define _ERROR_MSG(MSG) {perror((MSG)) do {\ + {perror((MSG)); fprintf(stderr,"errno=%d at %d(%s)",errno,__LINE__,__FILE__);\ +} while (0) -static int _sampling_initialized = 0; +static int _sampling_freq_initialized = 0; +static int _sampling_pwr_initialized = 0; +static int _sampling_engines_initialized = 0; // Static handles to stay throughout the execution -static ze_device_handle_t* _sampling_hDevices; -static zes_freq_handle_t** _sampling_hFrequencies; -static zes_pwr_handle_t** _sampling_hPowers; -static zes_engine_handle_t** _sampling_engineHandles; +static ze_device_handle_t* _sampling_hDevices = NULL; +static zes_freq_handle_t** _sampling_hFrequencies = NULL; +static zes_pwr_handle_t** _sampling_hPowers = NULL; +static zes_engine_handle_t** _sampling_engineHandles = NULL; static uint32_t _sampling_deviceCount = 0; static uint32_t _sampling_subDeviceCount = 0; -static uint32_t* _sampling_freqDomainCounts; -static uint32_t* _sampling_powerDomainCounts; -static uint32_t* _sampling_engineCounts; +static uint32_t* _sampling_freqDomainCounts = NULL; +static uint32_t* _sampling_powerDomainCounts = NULL; +static uint32_t* _sampling_engineCounts = NULL; typedef struct { uint64_t timestamp; @@ -779,6 +787,83 @@ typedef struct { uint64_t copyActive; } copyEngineData; +void intializeFrequency() { + ze_result_t res; + _sampling_hFrequencies = (zes_freq_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_freq_handle_t*)); + _sampling_freqDomainCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); + for (uint32_t i = 0; i < _sampling_deviceCount; i++) { + // Get frequency domains for each device + _sampling_freqDomainCounts[i] = 0; + res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_freqDomainCounts[i], NULL); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); + _sampling_freqDomainCounts[i] = 0; + return; + } + _sampling_hFrequencies[i] = (zes_freq_handle_t*) malloc(_sampling_freqDomainCounts[i] * sizeof(zes_freq_handle_t)); + res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_freqDomainCounts[i], _sampling_hFrequencies[i]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); + _sampling_freqDomainCounts[i] = 0; + free(_sampling_hFrequencies[i]); + return; + } + } + _sampling_freq_initialized = 1; +} + +void intializePower() { + ze_result_t res; + _sampling_hPowers = (zes_pwr_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_pwr_handle_t*)); + _sampling_powerDomainCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); + for (uint32_t i = 0; i < _sampling_deviceCount; i++) { + // Get power domains for each device + _sampling_powerDomainCounts[i] = 0; + res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_powerDomainCounts[i], NULL); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); + _sampling_powerDomainCounts[i] = 0; + return; + } + + _sampling_hPowers[i] = (zes_pwr_handle_t*) malloc(_sampling_powerDomainCounts[i] * sizeof(zes_pwr_handle_t)); + res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_powerDomainCounts[i], _sampling_hPowers[i]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); + _sampling_powerDomainCounts[i] = 0; + free(_sampling_hPowers[i]); + return; + } + } + _sampling_pwr_initialized = 1; +} + +void intializeEngines() { + ze_result_t res; + _sampling_engineHandles = (zes_engine_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_engine_handle_t*)); + _sampling_engineCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); + for (uint32_t i = 0; i < _sampling_deviceCount; i++) { + // Get engine counts for each device + _sampling_engineCounts[i] = 0; + res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[i], &_sampling_engineCounts[i], NULL); + if (res != ZE_RESULT_SUCCESS || _sampling_engineCounts[i] == 0) { + _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); + _sampling_engineCounts[i] = 0; + return; + } + + _sampling_engineHandles[i] = (zes_engine_handle_t*)malloc(_sampling_engineCounts[i] * sizeof(zes_engine_handle_t)); + res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[i], &_sampling_engineCounts[i], _sampling_engineHandles[i]); + if (res != ZE_RESULT_SUCCESS) { + _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); + _sampling_engineCounts[i] = 0; + free(_sampling_engineHandles[i]); + return; + } + } + _sampling_engines_initialized = 1; +} + int initializeHandles() { ze_result_t res; @@ -803,7 +888,7 @@ int initializeHandles() { return -1; } - ze_driver_handle_t *hDriver = (ze_driver_handle_t*) malloc(driverCount * sizeof(ze_driver_handle_t)); + ze_driver_handle_t *hDriver = (ze_driver_handle_t*) alloca(driverCount * sizeof(ze_driver_handle_t)); res = ZE_DRIVER_GET_PTR(&driverCount, hDriver); if (res != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("2nd ZE_DRIVER_GET_PTR", res); @@ -822,6 +907,7 @@ int initializeHandles() { res = ZE_DEVICE_GET_PTR(hDriver[0], &_sampling_deviceCount, _sampling_hDevices); if (res != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("2nd ZE_DEVICE_GET_PTR", res); + free(_sampling_hDevices); return -1; } //Get no sub-devices @@ -830,68 +916,14 @@ int initializeHandles() { _ZE_ERROR_MSG("ZE_DEVICE_GET_SUB_DEVICES_PTR", res); return -1; } - - _sampling_hFrequencies = (zes_freq_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_freq_handle_t*)); - _sampling_freqDomainCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); - - _sampling_hPowers = (zes_pwr_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_pwr_handle_t*)); - _sampling_powerDomainCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); - - _sampling_engineHandles = (zes_engine_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_engine_handle_t*)); - _sampling_engineCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); - - for (uint32_t i = 0; i < _sampling_deviceCount; i++) { - // Get frequency domains for each device - _sampling_freqDomainCounts[i] = 0; - res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_freqDomainCounts[i], NULL); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); - return -1; - } - - _sampling_hFrequencies[i] = (zes_freq_handle_t*) malloc(_sampling_freqDomainCounts[i] * sizeof(zes_freq_handle_t)); - res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_freqDomainCounts[i], _sampling_hFrequencies[i]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); - return -1; - } - // Get power domains for each device - _sampling_powerDomainCounts[i] = 0; - res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_powerDomainCounts[i], NULL); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); - return -1; - } - - _sampling_hPowers[i] = (zes_pwr_handle_t*) malloc(_sampling_powerDomainCounts[i] * sizeof(zes_pwr_handle_t)); - res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_powerDomainCounts[i], _sampling_hPowers[i]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); - return -1; - } - // Get the available engines for each device - _sampling_engineCounts[i] = 0; - res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[i], &_sampling_engineCounts[i], NULL); - if (res != ZE_RESULT_SUCCESS || _sampling_engineCounts[i] == 0) { - _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); - return -1; - } - - _sampling_engineHandles[i] = (zes_engine_handle_t*)malloc(_sampling_engineCounts[i] * sizeof(zes_engine_handle_t)); - res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[i], &_sampling_engineCounts[i], _sampling_engineHandles[i]); - if (res != ZE_RESULT_SUCCESS) { - _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); - free(_sampling_engineHandles); - return -1; - } - } - free(hDriver); - _sampling_initialized=1; + intializeFrequency(); + intializePower(); + intializeEngines(); return 0; } void readFrequency(uint32_t deviceIdx, uint32_t domainIdx, uint32_t *frequency) { - if (!_sampling_initialized) return; + if (!_sampling_freq_initialized) return; ze_result_t result; *frequency=0; zes_freq_state_t freqState; @@ -903,7 +935,7 @@ void readFrequency(uint32_t deviceIdx, uint32_t domainIdx, uint32_t *frequency) } void readEnergy(uint32_t deviceIdx, uint32_t domainIdx, uint64_t *ts_us, uint64_t *energy_uj) { - if (!_sampling_initialized) return; + if (!_sampling_pwr_initialized) return; ze_result_t result; *ts_us = 0; *energy_uj = 0; @@ -911,13 +943,14 @@ void readEnergy(uint32_t deviceIdx, uint32_t domainIdx, uint64_t *ts_us, uint64_ result = ZES_POWER_GET_ENERGY_COUNTER_PTR(_sampling_hPowers[deviceIdx][domainIdx], &energyCounter); if (result != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("ZES_POWER_GET_ENERGY_COUNTER_PTR", result); - exit(-1); + return; } *ts_us = energyCounter.timestamp; *energy_uj = energyCounter.energy; } void readComputeE(uint32_t deviceIdx, computeEngineData *computeData ){ + if (!_sampling_engines_initialized) return; ze_result_t result; for (uint32_t i = 0; i < _sampling_subDeviceCount; i++) { computeData[i].computeActive = 0; @@ -929,14 +962,14 @@ void readComputeE(uint32_t deviceIdx, computeEngineData *computeData ){ result = ZES_ENGINE_GET_PROPERTIES_PTR(_sampling_engineHandles[deviceIdx][j], &engineProp); if (result != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("ZES_ENGINE_GET_PROPERTIES_PTR", result); - exit(-1); + return; } if (engineProp.type == ZES_ENGINE_GROUP_COMPUTE_ALL){ zes_engine_stats_t engineStats = {0}; result = ZES_ENGINE_GET_ACTIVITY_PTR(_sampling_engineHandles[deviceIdx][j], &engineStats); if (result != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("ZES_ENGINE_GET_ACTIVITY_PTR", result); - exit(-1); + return; } computeData[engineProp.subdeviceId].computeActive = engineStats.activeTime; computeData[engineProp.subdeviceId].timestamp = engineStats.timestamp; @@ -945,6 +978,7 @@ void readComputeE(uint32_t deviceIdx, computeEngineData *computeData ){ } void readCopyE(uint32_t deviceIdx, copyEngineData *copyData ){ + if (!_sampling_engines_initialized) return; ze_result_t result; for (uint32_t i = 0; i < _sampling_subDeviceCount; i++) { copyData[i].copyActive = 0; @@ -956,14 +990,14 @@ void readCopyE(uint32_t deviceIdx, copyEngineData *copyData ){ result = ZES_ENGINE_GET_PROPERTIES_PTR(_sampling_engineHandles[deviceIdx][j], &engineProp); if (result != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("ZES_ENGINE_GET_PROPERTIES_PTR", result); - exit(-1); + return; } if (engineProp.type == ZES_ENGINE_GROUP_COPY_ALL){ zes_engine_stats_t engineStats = {0}; result = ZES_ENGINE_GET_ACTIVITY_PTR(_sampling_engineHandles[deviceIdx][j], &engineStats); if (result != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("ZES_ENGINE_GET_ACTIVITY_PTR", result); - exit(-1); + return; } copyData[engineProp.subdeviceId].copyActive = engineStats.activeTime; copyData[engineProp.subdeviceId].timestamp = engineStats.timestamp; From 8c3f0e073b2cb28c5ba59f744dd6d996260576c1 Mon Sep 17 00:00:00 2001 From: Solomon Bekele Date: Mon, 9 Oct 2023 09:02:26 -0500 Subject: [PATCH 10/10] Apply suggestions from code review Suggestions added Co-authored-by: Brice Videau --- ze/tracer_ze_helpers.include.c | 14 +++++++------- ze/ze_events.yaml | 1 - ze/zeinterval_callbacks.hpp | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index 90d8f6ad..2ef2774f 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -793,12 +793,13 @@ void intializeFrequency() { _sampling_freqDomainCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); for (uint32_t i = 0; i < _sampling_deviceCount; i++) { // Get frequency domains for each device + _sampling_hFrequencies[i] = NULL; _sampling_freqDomainCounts[i] = 0; res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_freqDomainCounts[i], NULL); if (res != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); _sampling_freqDomainCounts[i] = 0; - return; + continue; } _sampling_hFrequencies[i] = (zes_freq_handle_t*) malloc(_sampling_freqDomainCounts[i] * sizeof(zes_freq_handle_t)); res = ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_freqDomainCounts[i], _sampling_hFrequencies[i]); @@ -806,7 +807,6 @@ void intializeFrequency() { _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_FREQUENCY_DOMAINS_PTR", res); _sampling_freqDomainCounts[i] = 0; free(_sampling_hFrequencies[i]); - return; } } _sampling_freq_initialized = 1; @@ -818,12 +818,13 @@ void intializePower() { _sampling_powerDomainCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); for (uint32_t i = 0; i < _sampling_deviceCount; i++) { // Get power domains for each device + _sampling_hPowers[i] = NULL; _sampling_powerDomainCounts[i] = 0; res = ZES_DEVICE_ENUM_POWER_DOMAINS_PTR(_sampling_hDevices[i], &_sampling_powerDomainCounts[i], NULL); if (res != ZE_RESULT_SUCCESS) { _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); _sampling_powerDomainCounts[i] = 0; - return; + continue; } _sampling_hPowers[i] = (zes_pwr_handle_t*) malloc(_sampling_powerDomainCounts[i] * sizeof(zes_pwr_handle_t)); @@ -832,7 +833,6 @@ void intializePower() { _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_POWER_DOMAINS_PTR", res); _sampling_powerDomainCounts[i] = 0; free(_sampling_hPowers[i]); - return; } } _sampling_pwr_initialized = 1; @@ -844,12 +844,13 @@ void intializeEngines() { _sampling_engineCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); for (uint32_t i = 0; i < _sampling_deviceCount; i++) { // Get engine counts for each device + _sampling_engineHandles[i] = NULL; _sampling_engineCounts[i] = 0; res = ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR(_sampling_hDevices[i], &_sampling_engineCounts[i], NULL); if (res != ZE_RESULT_SUCCESS || _sampling_engineCounts[i] == 0) { _ZE_ERROR_MSG("1st ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); _sampling_engineCounts[i] = 0; - return; + continue; } _sampling_engineHandles[i] = (zes_engine_handle_t*)malloc(_sampling_engineCounts[i] * sizeof(zes_engine_handle_t)); @@ -858,7 +859,6 @@ void intializeEngines() { _ZE_ERROR_MSG("2nd ZES_DEVICE_ENUM_ENGINE_GROUPS_PTR", res); _sampling_engineCounts[i] = 0; free(_sampling_engineHandles[i]); - return; } } _sampling_engines_initialized = 1; @@ -1015,7 +1015,7 @@ static void thapi_sampling_energy() { if (tracepoint_enabled(lttng_ust_ze_sampling, gpu_frequency)){ for (uint32_t j = 0; j < _sampling_freqDomainCounts[i]; j++) { readFrequency(i, j, &frequency); - do_tracepoint(lttng_ust_ze_sampling, gpu_frequency, (ze_device_handle_t)_sampling_hDevices[i], j, ts_us, frequency); + do_tracepoint(lttng_ust_ze_sampling, gpu_frequency, (ze_device_handle_t)_sampling_hDevices[i], j, frequency); } } if (tracepoint_enabled(lttng_ust_ze_sampling, gpu_energy)){ diff --git a/ze/ze_events.yaml b/ze/ze_events.yaml index 646a2dbb..27c224e6 100644 --- a/ze/ze_events.yaml +++ b/ze/ze_events.yaml @@ -38,7 +38,6 @@ lttng_ust_ze_sampling: args: - [ ze_device_handle_t, hDevice ] - [ uint32_t, domain ] - - [ uint64_t, timestamp ] - [ uint64_t, frequency ] fields: - [ ctf_integer_hex, uintptr_t, hDevice, "(uintptr_t)hDevice" ] diff --git a/ze/zeinterval_callbacks.hpp b/ze/zeinterval_callbacks.hpp index 3297cdd8..63fa541b 100644 --- a/ze/zeinterval_callbacks.hpp +++ b/ze/zeinterval_callbacks.hpp @@ -63,7 +63,7 @@ struct zeinterval_callbacks_state { std::unordered_map device_energy_ref; /*computeEngine */ std::unordered_map device_computeEngine_ref; - /*copyEngine */ + /*copyEngine */ std::unordered_map device_copyEngine_ref; };