From e604c708e9abfc20b5c3011600a54fc3605e9870 Mon Sep 17 00:00:00 2001 From: Solomon Bekele <131887626+sbekele81@users.noreply.github.com> Date: Fri, 15 Sep 2023 16:35:08 -0500 Subject: [PATCH] Domain Timeline (#151) * Domain Timeline * Apply suggestions from code review Changes to domain timeline. Co-authored-by: Brice Videau * Changes Applied * Update utils/xprof_utils.hpp --------- Co-authored-by: sbekele Co-authored-by: Brice Videau --- utils/xprof_utils.hpp | 2 ++ xprof/btx_timeline.cpp | 46 +++++++++++++++------------------ ze/tracer_ze_helpers.include.c | 5 ++-- ze/zeinterval_callbacks.cpp.erb | 2 +- ze/zeinterval_callbacks.hpp | 3 ++- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/utils/xprof_utils.hpp b/utils/xprof_utils.hpp index 7136a95d..2695e4d5 100644 --- a/utils/xprof_utils.hpp +++ b/utils/xprof_utils.hpp @@ -46,6 +46,7 @@ typedef uintptr_t thread_id_t; typedef std::string hostname_t; typedef std::string thapi_function_name; typedef uintptr_t thapi_device_id; +typedef uint32_t thapi_domain_id; // Represent a device and a sub device typedef std::tuple dsd_t; @@ -58,6 +59,7 @@ typedef std::tuple hp_device_t; typedef std::tuple hp_dsd_t; +typedef std::tuple hp_ddomain_t; typedef std::tuple sd_t; typedef std::tuple tfn_ts_t; typedef std::tuple fn_ts_t; diff --git a/xprof/btx_timeline.cpp b/xprof/btx_timeline.cpp index 98d060ba..0c9f3d0d 100644 --- a/xprof/btx_timeline.cpp +++ b/xprof/btx_timeline.cpp @@ -27,10 +27,9 @@ struct timeline_dispatch_s { std::map> track2lasts; - std::unordered_map hp2frqtracks; - std::unordered_map hp2pwrtracks; - std::unordered_map hp_devs2frqtracks; - std::unordered_map hp_devs2pwrtracks; + std::unordered_map hp_device2countertracks; + std::unordered_map hp_ddomain2frqtracks; + std::unordered_map hp_ddomain2pwrtracks; perfetto_pruned::Trace trace; }; @@ -42,10 +41,10 @@ static perfetto_uuid_t gen_perfetto_uuid() { return uuid++; } -static perfetto_uuid_t get_parent_counter_track_uuid(timeline_dispatch_t *dispatch, std::unordered_map &parent_tracks, - const std::string track_name, std::string hostname, uint64_t process_id) { +static perfetto_uuid_t get_parent_counter_track_uuid(timeline_dispatch_t *dispatch, + std::string hostname, uint64_t process_id, thapi_device_id did ) { perfetto_uuid_t hp_uuid = 0; - auto [it, inserted] = parent_tracks.insert({{hostname, process_id}, hp_uuid}); + auto [it, inserted] = dispatch->hp_device2countertracks.insert({{hostname, process_id, did}, hp_uuid}); auto &potential_uuid = it->second; // Exists if (!inserted) @@ -65,23 +64,23 @@ static perfetto_uuid_t get_parent_counter_track_uuid(timeline_dispatch_t *dispat auto *process = track_descriptor->mutable_process(); process->set_pid(hp_uuid); std::ostringstream oss; - oss << "Hostname " << hostname << " | Process " << process_id; - oss << " | " << track_name << " | uuid "; + oss << "Hostname " << hostname << " | Process " << process_id << " | Device " << did ; + //oss << " | " << track_name << " | uuid "; process->set_process_name(oss.str()); return hp_uuid; } -static perfetto_uuid_t get_counter_track_uuuid(timeline_dispatch_t *dispatch, std::unordered_map &parent_tracks, - std::unordered_map &counter_tracks, const std::string track_name, - std::string hostname, uint64_t process_id, thapi_device_id did) { +static perfetto_uuid_t get_counter_track_uuuid(timeline_dispatch_t *dispatch, + std::unordered_map &counter_tracks, const std::string track_name, + std::string hostname, uint64_t process_id, thapi_device_id did, thapi_domain_id domain) { perfetto_uuid_t hp_dev_uuid = 0; - auto [it, inserted] = counter_tracks.insert({{hostname, process_id, did}, hp_dev_uuid}); + auto [it, inserted] = counter_tracks.insert({{hostname, process_id, did, domain}, hp_dev_uuid}); auto &potential_uuid = it->second; // Exists if (!inserted) return potential_uuid; - perfetto_uuid_t hp_uuid = get_parent_counter_track_uuid(dispatch, parent_tracks, track_name, hostname, process_id); + perfetto_uuid_t hp_uuid = get_parent_counter_track_uuid(dispatch, hostname, process_id, did); hp_dev_uuid = gen_perfetto_uuid(); potential_uuid = hp_dev_uuid; @@ -93,27 +92,25 @@ static perfetto_uuid_t get_counter_track_uuuid(timeline_dispatch_t *dispatch, st track_descriptor->set_uuid(hp_dev_uuid); track_descriptor->set_parent_uuid(hp_uuid); std::ostringstream oss; - oss << "Device " << did; + oss << track_name << " | Domain " << domain; track_descriptor->set_name(oss.str()); track_descriptor->mutable_counter(); return hp_dev_uuid; } - static perfetto_uuid_t get_frequency_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, thapi_device_id did) { - return get_counter_track_uuuid(dispatch, dispatch->hp2frqtracks, dispatch->hp_devs2frqtracks, "GPU Frequency", hostname, process_id, did); + uint64_t process_id, thapi_device_id did, thapi_domain_id domain) { + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2frqtracks, "GPU Frequency", hostname, process_id, did, domain); } - static perfetto_uuid_t get_power_track_uuuid(timeline_dispatch_t *dispatch, std::string hostname, - uint64_t process_id, thapi_device_id did) { - return get_counter_track_uuuid(dispatch, dispatch->hp2pwrtracks, dispatch->hp_devs2pwrtracks, "GPU Power", hostname, process_id, did); + uint64_t process_id, thapi_device_id did, thapi_device_id domain) { + return get_counter_track_uuuid(dispatch, dispatch->hp_ddomain2pwrtracks, " GPU Power", hostname, process_id, did, domain); } static void add_event_frequency(timeline_dispatch_t *dispatch, std::string hostname, uint64_t process_id, uint64_t thread_id, uintptr_t did, uint32_t domain, uint64_t timestamp, uint64_t frequency) { - (void)domain; - perfetto_uuid_t track_uuid = get_frequency_track_uuuid(dispatch, hostname, process_id, did); + + perfetto_uuid_t track_uuid = get_frequency_track_uuuid(dispatch, hostname, process_id, did, domain); auto *packet = dispatch->trace.add_packet(); packet->set_trusted_packet_sequence_id(10000); packet->set_timestamp(timestamp); @@ -127,8 +124,7 @@ static void add_event_frequency(timeline_dispatch_t *dispatch, std::string hostn static void add_event_power(timeline_dispatch_t *dispatch, std::string hostname, uint64_t process_id, uint64_t thread_id, uintptr_t did, uint32_t domain, uint64_t timestamp, uint64_t power) { - (void)domain; - perfetto_uuid_t track_uuid = get_power_track_uuuid(dispatch, hostname, process_id, did); + perfetto_uuid_t track_uuid = get_power_track_uuuid(dispatch, hostname, process_id, did, domain); auto *packet = dispatch->trace.add_packet(); packet->set_trusted_packet_sequence_id(10000); packet->set_timestamp(timestamp); diff --git a/ze/tracer_ze_helpers.include.c b/ze/tracer_ze_helpers.include.c index fee27ecf..583299f3 100644 --- a/ze/tracer_ze_helpers.include.c +++ b/ze/tracer_ze_helpers.include.c @@ -879,11 +879,11 @@ static void thapi_sampling_energy() { uint64_t energy_uj; uint32_t frequency; for (uint32_t i = 0; i < _sampling_deviceCount; i++) { - for (uint32_t j = 0; j < (_sampling_freqDomainCounts[i] >= 1 ? 1:0); j++) { + for (uint32_t j = 0; j < _sampling_freqDomainCounts[i]; j++) { readFrequency(i, j, &frequency); do_tracepoint(lttng_ust_ze_sampling, gpu_frequency, (ze_device_handle_t)_sampling_hDevices[i], j, ts_us, frequency); } - for (uint32_t j = 0; j < (_sampling_powerDomainCounts[i] >= 1 ? 1:0); j++) { + for (uint32_t j = 0; j < _sampling_powerDomainCounts[i]; j++) { readEnergy(i, j, &ts_us, &energy_uj); do_tracepoint(lttng_ust_ze_sampling, gpu_energy, (ze_device_handle_t)_sampling_hDevices[i], j, (uint64_t)energy_uj, ts_us); } @@ -946,6 +946,7 @@ static void _load_tracer(void) { /* TODO: make it configurable */ interval.tv_sec = 0; interval.tv_nsec = 50000000; + thapi_sampling_energy(); thapi_register_sampling(&thapi_sampling_energy, &interval); } diff --git a/ze/zeinterval_callbacks.cpp.erb b/ze/zeinterval_callbacks.cpp.erb index 8ca790bd..e3199e1e 100644 --- a/ze/zeinterval_callbacks.cpp.erb +++ b/ze/zeinterval_callbacks.cpp.erb @@ -81,7 +81,7 @@ void *init_zeinterval_callbacks_state() { static void create_and_enqueue_power_message(const char* hostname, const process_id_t process_id, const thread_id_t thread_id, const uintptr_t hDevice, const uint32_t domain, const uint64_t energy, const uint64_t ts) { zeinterval_callbacks_state* state = (zeinterval_callbacks_state*) zeinterval_iter_g->callbacks_state; - auto [it, inserted] = state->device_energy_ref.insert({{hostname, process_id, hDevice}, {energy, ts}}); + auto [it, inserted] = state->device_energy_ref.insert({{hostname, process_id, hDevice, domain}, {energy, ts}}); // First entry if (inserted) return; diff --git a/ze/zeinterval_callbacks.hpp b/ze/zeinterval_callbacks.hpp index cbc0027e..d48d0bef 100644 --- a/ze/zeinterval_callbacks.hpp +++ b/ze/zeinterval_callbacks.hpp @@ -17,6 +17,7 @@ typedef std::tuple hp_comman typedef std::tuple hp_command_queue_t; typedef std::tuple hp_module_t; typedef hp_device_t hpd_t; +typedef hp_dsd_t hpdd_t; typedef hp_event_t hpe_t; typedef hp_kernel_t hpk_t; typedef std::tuple clock_lttng_device_t; @@ -56,7 +57,7 @@ struct zeinterval_callbacks_state { /* Stack to get begin end */ std::unordered_map> last_command; /*Energy */ - std::unordered_map device_energy_ref; + std::unordered_map device_energy_ref; }; template