diff --git a/utils/xprof_utils.cpp b/utils/xprof_utils.cpp index 5527e3eb..34e011b1 100644 --- a/utils/xprof_utils.cpp +++ b/utils/xprof_utils.cpp @@ -118,6 +118,103 @@ bt_message* create_frequency_message(const char* hostname, const process_id_t pr return message; } +bt_message* create_computeEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) { + + /* Message creation */ + bt_message *message = bt_message_event_create( + message_iterator, event_class, stream); + + /* event */ + bt_event *downstream_event = bt_message_event_borrow_event(message); + + /* Common context */ + bt_field *context_field = bt_event_borrow_common_context_field(downstream_event); + + // Hostname + bt_field *hostname_msg_field = bt_field_structure_borrow_member_field_by_index(context_field,0); + bt_field_string_set_value(hostname_msg_field, hostname); + // pid + bt_field *vpid_field = bt_field_structure_borrow_member_field_by_index(context_field,1); + bt_field_integer_signed_set_value(vpid_field, process_id); + // vid + bt_field *vtid_field = bt_field_structure_borrow_member_field_by_index(context_field,2); + bt_field_integer_signed_set_value(vtid_field, thread_id); + // ts + bt_field *ts_field = bt_field_structure_borrow_member_field_by_index(context_field,3); + bt_field_integer_signed_set_value(ts_field, ts); + // backend + bt_field *backend_field = bt_field_structure_borrow_member_field_by_index(context_field,4); + bt_field_integer_signed_set_value(backend_field, backend); + + /* Payload */ + bt_field *payload_field = bt_event_borrow_payload_field(downstream_event); + + // did + bt_field *device_id_field = bt_field_structure_borrow_member_field_by_index(payload_field,0); + bt_field_integer_unsigned_set_value(device_id_field, hDevice); + + //subDevice + bt_field *subDevice_field = bt_field_structure_borrow_member_field_by_index(payload_field,1); + bt_field_integer_unsigned_set_value(subDevice_field, subDevice); + + //activeTime + bt_field *activeTime_field = bt_field_structure_borrow_member_field_by_index(payload_field,2); + bt_field_integer_unsigned_set_value(activeTime_field, activeTime); + + return message; +} + +bt_message* create_copyEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) { + + /* Message creation */ + bt_message *message = bt_message_event_create( + message_iterator, event_class, stream); + + /* event */ + bt_event *downstream_event = bt_message_event_borrow_event(message); + + /* Common context */ + bt_field *context_field = bt_event_borrow_common_context_field(downstream_event); + + // Hostname + bt_field *hostname_msg_field = bt_field_structure_borrow_member_field_by_index(context_field,0); + bt_field_string_set_value(hostname_msg_field, hostname); + // pid + bt_field *vpid_field = bt_field_structure_borrow_member_field_by_index(context_field,1); + bt_field_integer_signed_set_value(vpid_field, process_id); + // vid + bt_field *vtid_field = bt_field_structure_borrow_member_field_by_index(context_field,2); + bt_field_integer_signed_set_value(vtid_field, thread_id); + // ts + bt_field *ts_field = bt_field_structure_borrow_member_field_by_index(context_field,3); + bt_field_integer_signed_set_value(ts_field, ts); + // backend + bt_field *backend_field = bt_field_structure_borrow_member_field_by_index(context_field,4); + bt_field_integer_signed_set_value(backend_field, backend); + + /* Payload */ + bt_field *payload_field = bt_event_borrow_payload_field(downstream_event); + + // did + bt_field *device_id_field = bt_field_structure_borrow_member_field_by_index(payload_field,0); + bt_field_integer_unsigned_set_value(device_id_field, hDevice); + + // subDevice + bt_field *subDevice_field = bt_field_structure_borrow_member_field_by_index(payload_field,1); + bt_field_integer_unsigned_set_value(subDevice_field, subDevice); + + //activeTime + bt_field *activeTime_field = bt_field_structure_borrow_member_field_by_index(payload_field,2); + bt_field_integer_unsigned_set_value(activeTime_field, activeTime); + + return message; +} + + bt_message* create_host_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, const char* name, const uint64_t ts, const uint64_t duration, const bool err, bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend) { diff --git a/utils/xprof_utils.hpp b/utils/xprof_utils.hpp index 2695e4d5..cf13da4a 100644 --- a/utils/xprof_utils.hpp +++ b/utils/xprof_utils.hpp @@ -47,7 +47,7 @@ typedef std::string hostname_t; typedef std::string thapi_function_name; typedef uintptr_t thapi_device_id; typedef uint32_t thapi_domain_id; - +typedef uint32_t thapi_sdevice_id; // Represent a device and a sub device typedef std::tuple<thapi_device_id, thapi_device_id> dsd_t; typedef std::tuple<hostname_t, process_id_t> hp_t; @@ -60,6 +60,7 @@ typedef std::tuple<hostname_t, process_id_t, thread_id_t, thapi_device_id, thapi typedef std::tuple<hostname_t, process_id_t, thapi_device_id> hp_device_t; typedef std::tuple<hostname_t, process_id_t, thapi_device_id, thapi_device_id> hp_dsd_t; typedef std::tuple<hostname_t, process_id_t, thapi_device_id, thapi_domain_id> hp_ddomain_t; +typedef std::tuple<hostname_t, process_id_t, thapi_device_id, thapi_sdevice_id> hp_dsdev_t; typedef std::tuple<long, long> sd_t; typedef std::tuple<thread_id_t, thapi_function_name, long> tfn_ts_t; typedef std::tuple<thapi_function_name, long> fn_ts_t; @@ -116,11 +117,18 @@ bt_message* create_power_message(const char* hostname, const process_id_t propro const uintptr_t hDevice, const uint32_t domain, const uint64_t power, const uint64_t ts, bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); - bt_message* create_frequency_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id, const uintptr_t hDevice, const uint32_t domain, const uint64_t ts, const uint64_t frequency, bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); +bt_message* create_computeEU_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); + +bt_message* create_copyEU_message(const char* hostname, const process_id_t proprocess_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts, + bt_event_class *event_class, bt_self_message_iterator *message_iterator, bt_stream *stream, backend_t backend = BACKEND_UNKNOWN); + bt_message *create_host_message(const char *hostname, const process_id_t, const thread_id_t, const char *name, const uint64_t ts, const uint64_t duration, const bool err, bt_event_class *, bt_self_message_iterator *, diff --git a/xprof/btx_interval_model.yaml b/xprof/btx_interval_model.yaml index f62aac1a..78785670 100644 --- a/xprof/btx_interval_model.yaml +++ b/xprof/btx_interval_model.yaml @@ -127,3 +127,41 @@ :type: integer_unsigned :field_value_range: 64 :cast_type: uint64_t + - :name: lttng:computeEU + :payload_field_class: + :type: structure + :members: + - :name: did + :field_class: + :type: integer_unsigned + :field_value_range: 64 + :cast_type: uint64_t + - :name: subDevice + :field_class: + :type: integer_unsigned + :field_value_range: 32 + :cast_type: uint32_t + - :name: activeTime + :field_class: + :type: integer_unsigned + :field_value_range: 64 + :cast_type: uint64_t + - :name: lttng:copyEU + :payload_field_class: + :type: structure + :members: + - :name: did + :field_class: + :type: integer_unsigned + :field_value_range: 64 + :cast_type: uint64_t + - :name: subDevice + :field_class: + :type: integer_unsigned + :field_value_range: 32 + :cast_type: uint32_t + - :name: activeTime + :field_class: + :type: integer_unsigned + :field_value_range: 64 + :cast_type: uint64_t diff --git a/xprof/btx_timeline.cpp b/xprof/btx_timeline.cpp index 0c9f3d0d..4dc15661 100644 --- a/xprof/btx_timeline.cpp +++ b/xprof/btx_timeline.cpp @@ -30,7 +30,8 @@ struct timeline_dispatch_s { std::unordered_map<hp_device_t, perfetto_uuid_t> hp_device2countertracks; std::unordered_map<hp_ddomain_t, perfetto_uuid_t> hp_ddomain2frqtracks; std::unordered_map<hp_ddomain_t, perfetto_uuid_t> hp_ddomain2pwrtracks; - + std::unordered_map<hp_dsdev_t, perfetto_uuid_t> hp_dsdev2cpetracks; + std::unordered_map<hp_dsdev_t, perfetto_uuid_t> hp_dsdev2cpytracks; perfetto_pruned::Trace trace; }; using timeline_dispatch_t = struct timeline_dispatch_s; @@ -99,13 +100,24 @@ static perfetto_uuid_t get_counter_track_uuuid(timeline_dispatch_t *dispatch, } static perfetto_uuid_t get_frequency_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, uint64_t process_id, thapi_device_id did, thapi_domain_id domain) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2frqtracks, "GPU Frequency", hostname, process_id, did, domain); + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2frqtracks, " GPU Frequency", hostname, process_id, did, domain); } static perfetto_uuid_t get_power_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, thapi_device_id did, thapi_device_id domain) { - return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2pwrtracks, " GPU Power", hostname, process_id, did, domain); + uint64_t process_id, thapi_device_id did, thapi_domain_id domain) { + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2pwrtracks, " GPU Power", hostname, process_id, did, domain); +} + +static perfetto_uuid_t get_computeEU_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, + uint64_t process_id, thapi_device_id did, thapi_sdevice_id subDevice) { + return get_counter_track_uuuid(dispatch, dispatch->hp_dsdev2cpetracks, "ComputeE Utilization", hostname, process_id, did, subDevice); +} + +static perfetto_uuid_t get_copyEU_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, + uint64_t process_id, thapi_device_id did, thapi_sdevice_id subDevice) { + return get_counter_track_uuuid(dispatch, dispatch->hp_dsdev2cpytracks, "CopyE Utilization", hostname, process_id, did, subDevice); } + static void add_event_frequency(timeline_dispatch_t *dispatch, std::string hostname, uint64_t process_id, uint64_t thread_id, uintptr_t did, uint32_t domain, uint64_t timestamp, uint64_t frequency) { @@ -134,6 +146,34 @@ static void add_event_power(timeline_dispatch_t *dispatch, std::string hostname, track_event->set_name("Power"); track_event->set_counter_value(power); } +static void add_event_computeEU(timeline_dispatch_t *dispatch, std::string hostname, + uint64_t process_id, uint64_t thread_id, uintptr_t did, + uint32_t subDevice, uint64_t timestamp, uint64_t activeTime) { + perfetto_uuid_t track_uuid = get_computeEU_track_uuuid(dispatch, hostname, process_id, did, subDevice); + auto *packet = dispatch->trace.add_packet(); + packet->set_trusted_packet_sequence_id(10000); + packet->set_timestamp(timestamp); + auto *track_event = packet->mutable_track_event(); + track_event->set_type(perfetto_pruned::TrackEvent::TYPE_COUNTER); + track_event->set_track_uuid(track_uuid); + track_event->set_name("computeEngine Usage"); + track_event->set_counter_value(activeTime); +} + +static void add_event_copyEU(timeline_dispatch_t *dispatch, std::string hostname, + uint64_t process_id, uint64_t thread_id, uintptr_t did, + uint32_t subDevice, uint64_t timestamp, uint64_t activeTime) { + perfetto_uuid_t track_uuid = get_copyEU_track_uuuid(dispatch, hostname, process_id, did, subDevice); + auto *packet = dispatch->trace.add_packet(); + packet->set_trusted_packet_sequence_id(10000); + packet->set_timestamp(timestamp); + auto *track_event = packet->mutable_track_event(); + track_event->set_type(perfetto_pruned::TrackEvent::TYPE_COUNTER); + track_event->set_track_uuid(track_uuid); + track_event->set_name("copyEngine Usage"); + track_event->set_counter_value(activeTime); +} + static void add_event_begin(timeline_dispatch_t *dispatch, perfetto_uuid_t uuid, timestamp_t begin, std::string name) { @@ -352,11 +392,28 @@ static void power_usr_callback(void *btx_handle, void *usr_data, const char *hos add_event_power(dispatch, hostname, vpid, vtid, did, domain, ts, power); } +static void computeEU_usr_callback(void *btx_handle, void *usr_data, const char *hostname, + int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, + uint64_t did, uint32_t subDevice, uint64_t activeTime) { + auto *dispatch = static_cast<timeline_dispatch_t *>(usr_data); + add_event_computeEU(dispatch, hostname, vpid, vtid, did, subDevice, ts, activeTime); +} + +static void copyEU_usr_callback(void *btx_handle, void *usr_data, const char *hostname, + int64_t vpid, uint64_t vtid, int64_t ts, int64_t backend, + uint64_t did, uint32_t subDevice, uint64_t activeTime) { + auto *dispatch = static_cast<timeline_dispatch_t *>(usr_data); + add_event_copyEU(dispatch, hostname, vpid, vtid, did, subDevice, ts, activeTime); +} + + void btx_register_usr_callbacks(void *btx_handle) { btx_register_callbacks_lttng_host(btx_handle, &host_usr_callback); btx_register_callbacks_lttng_device(btx_handle, &device_usr_callback); btx_register_callbacks_lttng_frequency(btx_handle, &frequency_usr_callback); btx_register_callbacks_lttng_power(btx_handle, &power_usr_callback); + btx_register_callbacks_lttng_computeEU(btx_handle, &computeEU_usr_callback); + btx_register_callbacks_lttng_copyEU(btx_handle, ©EU_usr_callback); btx_register_callbacks_initialize_usr_data(btx_handle, &btx_initialize_usr_data); btx_register_callbacks_finalize_usr_data(btx_handle, &btx_finalize_usr_data); } diff --git a/xprof/interval.c.erb b/xprof/interval.c.erb index 8e09ef4d..9a57d52e 100644 --- a/xprof/interval.c.erb +++ b/xprof/interval.c.erb @@ -99,7 +99,8 @@ bt_component_class_initialize_method_status <%= namespace %>_dispatch_initialize dispatch->device_name_event_class = create_lttng_device_name_event_class_message(trace_class, stream_class); dispatch->frequency_event_class = create_lttng_frequency_event_class_message(trace_class, stream_class); dispatch->power_event_class = create_lttng_power_event_class_message(trace_class, stream_class); - + dispatch->computeEU_event_class = create_lttng_computeEU_event_class_message(trace_class, stream_class); + dispatch->copyEU_event_class = create_lttng_copyEU_event_class_message(trace_class, stream_class); /* Create a default trace from (instance of `trace_class`) */ bt_trace *trace = bt_trace_create(trace_class); diff --git a/xprof/interval.h.erb b/xprof/interval.h.erb index 2ebdb184..3a0e1d91 100644 --- a/xprof/interval.h.erb +++ b/xprof/interval.h.erb @@ -58,6 +58,8 @@ struct <%= namespace %>_dispatch { bt_event_class *device_name_event_class; bt_event_class *frequency_event_class; bt_event_class *power_event_class; + bt_event_class *computeEU_event_class; + bt_event_class *copyEU_event_class; /* Component's input port (weak) */ bt_self_component_port_input *in_port; }; diff --git a/xprof/interval_model.yaml b/xprof/interval_model.yaml index 0a9df04a..af2e6e45 100644 --- a/xprof/interval_model.yaml +++ b/xprof/interval_model.yaml @@ -78,3 +78,27 @@ :field_value_range: 32 - :name: power :class: unsigned +- :name: lttng:computeEU + :payload: + - :name: did + :class: unsigned + :class_properties: + :preferred_display_base: 16 + - :name: subDevice + :class: unsigned + :class_properties: + :field_value_range: 32 + - :name: activeTime + :class: unsigned +- :name: lttng:copyEU + :payload: + - :name: did + :class: unsigned + :class_properties: + :preferred_display_base: 16 + - :name: subDevice + :class: unsigned + :class_properties: + :field_value_range: 32 + - :name: activeTime + :class: unsigned diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index 583299f3..db605617 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -762,9 +762,23 @@ static int _sampling_initialized = 0; static ze_device_handle_t* _sampling_hDevices; static zes_freq_handle_t** _sampling_hFrequencies; static zes_pwr_handle_t** _sampling_hPowers; +static zes_engine_handle_t** _sampling_engineHandles; static uint32_t _sampling_deviceCount; +static uint32_t _sampling_subDeviceCount; static uint32_t* _sampling_freqDomainCounts; static uint32_t* _sampling_powerDomainCounts; +static uint32_t* _sampling_engineCounts; +static uint32_t* _sampling_engineCounts; + +typedef struct { + uint64_t timestamp; + uint64_t computeActive; +} computeEngineData; + +typedef struct { + uint64_t timestamp; + uint64_t copyActive; +} copyEngineData; int initializeHandles() { ze_result_t res; @@ -811,6 +825,8 @@ int initializeHandles() { _ZE_ERROR_MSG("2nd ZE_DRIVER_GET_PTR", res); return -1; } + //Get no sub-devices + zeDeviceGetSubDevices(_sampling_hDevices[0], &_sampling_subDeviceCount, NULL); _sampling_hFrequencies = (zes_freq_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_freq_handle_t*)); _sampling_freqDomainCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); @@ -818,6 +834,9 @@ int initializeHandles() { _sampling_hPowers = (zes_pwr_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_pwr_handle_t*)); _sampling_powerDomainCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); + _sampling_engineHandles = (zes_engine_handle_t**) malloc(_sampling_deviceCount * sizeof(zes_engine_handle_t*)); + _sampling_engineCounts = (uint32_t*) malloc(_sampling_deviceCount * sizeof(uint32_t)); + for (uint32_t i = 0; i < _sampling_deviceCount; i++) { // Get frequency domains for each device res = zesDeviceEnumFrequencyDomains(_sampling_hDevices[i], &_sampling_freqDomainCounts[i], NULL); @@ -846,6 +865,19 @@ int initializeHandles() { printf("zesDeviceEnumPowerDomains failed for device %d: %d\n", i, res); return(-1); } + // Get the available engines for each device + res = zesDeviceEnumEngineGroups(_sampling_hDevices[i], &_sampling_engineCounts[i], NULL); + if (res != ZE_RESULT_SUCCESS || _sampling_engineCounts[i] == 0) { + printf("No engine groups found\n"); + return(-1); + } + _sampling_engineHandles[i] = (zes_engine_handle_t*)malloc(_sampling_engineCounts[i] * sizeof(zes_engine_handle_t)); + res = zesDeviceEnumEngineGroups(_sampling_hDevices[i], &_sampling_engineCounts[i], _sampling_engineHandles[i]); + if (res != ZE_RESULT_SUCCESS) { + printf("Failed to get engine group handles\n"); + free(_sampling_engineHandles); + return (-1); + } } free(hDriver); _sampling_initialized=1; @@ -874,10 +906,50 @@ void readEnergy(uint32_t deviceIdx, uint32_t domainIdx, uint64_t *ts_us, uint64_ } } +void readPerformance(uint32_t deviceIdx, computeEngineData *computeData, copyEngineData *copyData ){ + ze_result_t result; + for (uint32_t i = 0; i < _sampling_subDeviceCount; i++) { + computeData[i].computeActive = 0; + computeData[i].timestamp = 0; + copyData[i].copyActive = 0; + copyData[i].timestamp = 0; + } + for (uint32_t j = 0; j < _sampling_engineCounts[deviceIdx]; ++j) { + zes_engine_properties_t engineProp = {}; + result = zesEngineGetProperties(_sampling_engineHandles[deviceIdx][j], &engineProp); + if (result != ZE_RESULT_SUCCESS) { + printf("Failed to get engine properties\n"); + exit(-1); + } + if (engineProp.type == ZES_ENGINE_GROUP_COMPUTE_ALL){ + zes_engine_stats_t engineStats = {0}; + result = zesEngineGetActivity(_sampling_engineHandles[deviceIdx][j], &engineStats); + if (result != ZE_RESULT_SUCCESS) { + printf("Failed to get engine activity data\n"); + exit(-1); + } + computeData[engineProp.subdeviceId].computeActive = engineStats.activeTime; + computeData[engineProp.subdeviceId].timestamp = engineStats.timestamp; + } + if (engineProp.type == ZES_ENGINE_GROUP_COPY_ALL){ + zes_engine_stats_t engineStats = {0}; + result = zesEngineGetActivity(_sampling_engineHandles[deviceIdx][j], &engineStats); + if (result != ZE_RESULT_SUCCESS) { + printf("Failed to get engine activity data\n"); + exit(-1); + } + copyData[engineProp.subdeviceId].copyActive = engineStats.activeTime; + copyData[engineProp.subdeviceId].timestamp = engineStats.timestamp; + } + } +} + static void thapi_sampling_energy() { uint64_t ts_us; uint64_t energy_uj; uint32_t frequency; + computeEngineData computeE[_sampling_subDeviceCount]; + copyEngineData copyE[_sampling_subDeviceCount]; for (uint32_t i = 0; i < _sampling_deviceCount; i++) { for (uint32_t j = 0; j < _sampling_freqDomainCounts[i]; j++) { readFrequency(i, j, &frequency); @@ -887,6 +959,11 @@ static void thapi_sampling_energy() { readEnergy(i, j, &ts_us, &energy_uj); do_tracepoint(lttng_ust_ze_sampling, gpu_energy, (ze_device_handle_t)_sampling_hDevices[i], j, (uint64_t)energy_uj, ts_us); } + readPerformance(i, computeE, copyE); + for (uint32_t k=0; k<_sampling_subDeviceCount; k++){ + do_tracepoint(lttng_ust_ze_sampling, computeEngine , (ze_device_handle_t)_sampling_hDevices[i], k, computeE[k].computeActive, computeE[k].timestamp); + do_tracepoint(lttng_ust_ze_sampling, copyEngine, (ze_device_handle_t)_sampling_hDevices[i], k, copyE[k].copyActive, copyE[k].timestamp); + } } } diff --git a/ze/ze_events.yaml b/ze/ze_events.yaml index 4d914186..646a2dbb 100644 --- a/ze/ze_events.yaml +++ b/ze/ze_events.yaml @@ -1,10 +1,32 @@ --- lttng_ust_ze_sampling: events: + - name: copyEngine + args: + - [ ze_device_handle_t, hDevice ] + - [ uint32_t, subDevice ] + - [ uint64_t, activeTime ] + - [ uint64_t, timestamp ] + fields: + - [ ctf_integer_hex, uintptr_t, hDevice, "(uintptr_t)hDevice" ] + - [ ctf_integer, uint32_t, subDevice, "subDevice" ] + - [ ctf_integer, uint64_t, activeTime, "activeTime" ] + - [ ctf_integer, uint64_t, timestamp, "timestamp" ] + - name: computeEngine + args: + - [ ze_device_handle_t, hDevice ] + - [ uint32_t, subDevice ] + - [ uint64_t, activeTime ] + - [ uint64_t, timestamp ] + fields: + - [ ctf_integer_hex, uintptr_t, hDevice, "(uintptr_t)hDevice" ] + - [ ctf_integer, uint32_t, subDevice, "subDevice" ] + - [ ctf_integer, uint64_t, activeTime, "activeTime" ] + - [ ctf_integer, uint64_t, timestamp, "timestamp" ] - name: gpu_energy args: - [ ze_device_handle_t, hDevice ] - - [ uint32_t, domain] + - [ uint32_t, domain ] - [ uint64_t, energy ] - [ uint64_t, timestamp ] fields: @@ -15,8 +37,8 @@ lttng_ust_ze_sampling: - name: gpu_frequency args: - [ ze_device_handle_t, hDevice ] - - [ uint32_t, domain] - - [ uint64_t, timestamp ] + - [ uint32_t, domain ] + - [ uint64_t, timestamp ] - [ uint64_t, frequency ] fields: - [ ctf_integer_hex, uintptr_t, hDevice, "(uintptr_t)hDevice" ] diff --git a/ze/zeinterval_callbacks.cpp.erb b/ze/zeinterval_callbacks.cpp.erb index e3199e1e..86ba6b9f 100644 --- a/ze/zeinterval_callbacks.cpp.erb +++ b/ze/zeinterval_callbacks.cpp.erb @@ -124,6 +124,52 @@ static void create_and_enqueue_host_message(const char* hostname, const process_ state->downstream_message_queue.push(message); } +static void create_and_enqueue_computeEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts) { + zeinterval_callbacks_state* state = (zeinterval_callbacks_state*) zeinterval_iter_g->callbacks_state; + auto [it, inserted] = state->device_computeEngine_ref.insert({{hostname, process_id, hDevice, subDevice}, {activeTime, ts}}); + // First entry + if (inserted) + return; + + auto &[prev_activeTime, prev_ts] = it->second; + + bt_message *message = create_computeEU_message(hostname, process_id, + thread_id, hDevice, subDevice, + static_cast<uint64_t>(((activeTime - prev_activeTime) / static_cast<double>(ts-prev_ts))*100000.0), + prev_ts, + zeinterval_iter_g->dispatch->computeEU_event_class, + zeinterval_self_message_iterator_g, + zeinterval_iter_g->dispatch->stream, BACKEND_ZE); + state->downstream_message_queue.push(message); + prev_activeTime = activeTime; + prev_ts = ts; +} + +static void create_and_enqueue_copyEU_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, + const uintptr_t hDevice, const uint32_t subDevice, const uint64_t activeTime, const uint64_t ts) { + zeinterval_callbacks_state* state = (zeinterval_callbacks_state*) zeinterval_iter_g->callbacks_state; + auto [it, inserted] = state->device_copyEngine_ref.insert({{hostname, process_id, hDevice, subDevice}, {activeTime, ts}}); + // First entry + if (inserted) + return; + + auto &[prev_activeTime, prev_ts] = it->second; + + bt_message *message = create_copyEU_message(hostname, process_id, + thread_id, hDevice, subDevice, + static_cast<uint64_t>(((activeTime-prev_activeTime) / static_cast<double>(ts-prev_ts))*100000.0), + prev_ts, + zeinterval_iter_g->dispatch->copyEU_event_class, + zeinterval_self_message_iterator_g, + zeinterval_iter_g->dispatch->stream, BACKEND_ZE); + state->downstream_message_queue.push(message); + prev_activeTime = activeTime; + prev_ts = ts; +} + + + static void create_and_enqueue_device_message( const char* hostname, const process_id_t process_id, const thread_id_t thread_id, thapi_device_id device, const char* commandname, const char* metadata, @@ -293,6 +339,20 @@ static void zeinterval_<%= dbt_event.name %>_callback( int64_t ns_from_origin; bt_clock_snapshot_get_ns_from_origin(bt_clock, &ns_from_origin); create_and_enqueue_frequency_message(hostname.c_str(), process_id, thread_id, (uintptr_t)hDevice, domain, ns_from_origin, frequency); + <% elsif dbt_event.name_unsanitized == "lttng_ust_ze_sampling:computeEngine" %> + const hostname_t hostname = borrow_hostname(bt_evt); + const process_id_t process_id = 0; + const thread_id_t thread_id = 0; + int64_t ns_from_origin; + bt_clock_snapshot_get_ns_from_origin(bt_clock, &ns_from_origin); + create_and_enqueue_computeEU_message(hostname.c_str(), process_id, thread_id, (uintptr_t)hDevice, subDevice, activeTime, ns_from_origin); + <% elsif dbt_event.name_unsanitized == "lttng_ust_ze_sampling:copyEngine" %> + const hostname_t hostname = borrow_hostname(bt_evt); + const process_id_t process_id = 0; + const thread_id_t thread_id = 0; + int64_t ns_from_origin; + bt_clock_snapshot_get_ns_from_origin(bt_clock, &ns_from_origin); + create_and_enqueue_copyEU_message(hostname.c_str(), process_id, thread_id, (uintptr_t)hDevice, subDevice, activeTime, ns_from_origin); <% elsif dbt_event.name_unsanitized.start_with?('lttng_ust_ze:') or dbt_event.name_unsanitized.start_with?('lttng_ust_zet:') or dbt_event.name_unsanitized.start_with?('lttng_ust_zes:') or diff --git a/ze/zeinterval_callbacks.hpp b/ze/zeinterval_callbacks.hpp index d48d0bef..466f2f08 100644 --- a/ze/zeinterval_callbacks.hpp +++ b/ze/zeinterval_callbacks.hpp @@ -18,10 +18,13 @@ typedef std::tuple<hostname_t, process_id_t, ze_command_queue_handle_t> hp_comma typedef std::tuple<hostname_t, process_id_t, ze_module_handle_t> hp_module_t; typedef hp_device_t hpd_t; typedef hp_dsd_t hpdd_t; +typedef hp_dsdev_t hpdsd_t; typedef hp_event_t hpe_t; typedef hp_kernel_t hpk_t; typedef std::tuple<uint64_t, uint64_t> clock_lttng_device_t; typedef std::tuple<uint64_t, uint64_t> energy_timestamp_t; +typedef std::tuple<uint64_t, uint64_t> computeEngine_timestamp_t; +typedef std::tuple<uint64_t, uint64_t> copyEngine_timestamp_t; typedef std::tuple<thread_id_t, thapi_function_name, std::string, thapi_device_id, uint64_t, clock_lttng_device_t> t_tfnm_m_d_ts_cld_t; typedef std::tuple<ze_command_list_handle_t, thapi_function_name, std::string, thapi_device_id, uint64_t> l_tfnm_m_d_ts_t; @@ -58,6 +61,11 @@ struct zeinterval_callbacks_state { std::unordered_map<hpt_t, std::vector<std::byte>> last_command; /*Energy */ std::unordered_map<hpdd_t, energy_timestamp_t> device_energy_ref; + /*computeEngine */ + std::unordered_map<hpdsd_t, computeEngine_timestamp_t> device_computeEngine_ref; + /*copyEngine */ + std::unordered_map<hpdsd_t, copyEngine_timestamp_t> device_copyEngine_ref; + }; template <class K,