diff --git a/src/services/topdown/SapphireRapidsTopdown.cpp b/src/services/topdown/SapphireRapidsTopdown.cpp index 1739e144..a7e55bcf 100644 --- a/src/services/topdown/SapphireRapidsTopdown.cpp +++ b/src/services/topdown/SapphireRapidsTopdown.cpp @@ -2,6 +2,21 @@ #include +#define RETIRING_OFFSET 0 +#define BAD_SPEC_OFFSET 1 +#define FE_BOUND_OFFSET 2 +#define BE_BOUND_OFFSET 3 + +#define HEAVY_OPS_OFFSET 4 +#define BR_MISPRED_OFFSET 5 +#define FETCH_LAT_OFFSET 6 +#define MEM_BOUND_OFFSET 7 + +static double get_tma_percent_from_rdpmc_value(uint64_t rdpmc_value, + uint64_t offset) { + return (double)((rdpmc_value >> (offset * 8)) & 0xff) / 0xff; +} + namespace cali { namespace topdown { @@ -10,22 +25,10 @@ SapphireRapidsTopdown::SapphireRapidsTopdown(IntelTopdownLevel level) level, // top_counters "perf::slots" - ",perf::topdown-retiring" - ",perf::topdown-bad-spec" - ",perf::topdown-fe-bound" - ",perf::topdown-be-bound" - ",INT_MISC:UOP_DROPPING", + ",perf::topdown-retiring", // all_counters "perf::slots" - ",perf::topdown-retiring" - ",perf::topdown-bad-spec" - ",perf::topdown-fe-bound" - ",perf::topdown-be-bound" - ",INT_MISC:UOP_DROPPING" - ",perf_raw::r8400" // topdown-heavy-ops - ",perf_raw::r8500" // topdown-br-mispredict - ",perf_raw::r8600" // topdown-fetch-lat - ",perf_raw::r8700", // topdown-mem-bound + ",perf::topdown-retiring", // res_top {"retiring", "backend_bound", "frontend_bound", "bad_speculation"}, // res_all @@ -44,43 +47,29 @@ SapphireRapidsTopdown::compute_toplevel(const std::vector &rec) { // Get PAPI metrics for toplevel calculations Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); - Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); - Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); - Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); - Variant v_int_misc_uop_dropping = - get_val_from_rec(rec, "INT_MISC:UOP_DROPPING"); + Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); // Check if any Variant is empty (use .empty()) - bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || - v_bad_spec.empty() || v_retiring.empty() || - v_int_misc_uop_dropping.empty() || - v_slots_or_info_thread_slots.empty(); + bool is_incomplete = + v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); // Check if all Variants are greater than 0 when casted to doubles (use // .to_double()) - bool is_nonzero = - v_fe_bound.to_double() > 0.0 && v_be_bound.to_double() > 0.0 && - v_bad_spec.to_double() > 0.0 && v_retiring.to_double() > 0.0 && - v_int_misc_uop_dropping.to_double() > 0.0 && - v_slots_or_info_thread_slots.to_double() > 0.0; + bool is_nonzero = v_tma_metrics.to_uint() > 0; // Check if bad values were obtained if (is_incomplete || !is_nonzero) return ret; - // Perform toplevel calcs - double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() + - v_fe_bound.to_double() + v_be_bound.to_double()); - - double retiring = (v_retiring.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); - double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) - - (v_int_misc_uop_dropping.to_double() / - v_slots_or_info_thread_slots.to_double()); - double backend_bound = (v_be_bound.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); + uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); + + double retiring = + get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET); + double frontend_bound = + get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET); + double backend_bound = + get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET); double bad_speculation = - std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0); + get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET); // Add toplevel metrics to vector of Entry ret.reserve(4); @@ -106,30 +95,22 @@ SapphireRapidsTopdown::compute_retiring(const std::vector &rec) { // Get PAPI metrics for toplevel calculations Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); - Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); - Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); - Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); - Variant v_heavy_ops = get_val_from_rec(rec, "perf_raw::r8400"); + Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); // Check if any Variant is empty (use .empty()) - bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || - v_bad_spec.empty() || v_retiring.empty() || - v_slots_or_info_thread_slots.empty() || - v_heavy_ops.empty(); + bool is_incomplete = + v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); // Check if bad values were obtained if (is_incomplete) return ret; - double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() + - v_fe_bound.to_double() + v_be_bound.to_double()); - // Copied from compute_toplevel - double retiring = (v_retiring.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); + uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); - double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); + double retiring = + get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET); + double heavy_ops = + get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, HEAVY_OPS_OFFSET); double light_ops = std::max(0.0, retiring - heavy_ops); // Add toplevel metrics to vector of Entry @@ -152,30 +133,22 @@ SapphireRapidsTopdown::compute_backend_bound(const std::vector &rec) { // Get PAPI metrics for toplevel calculations Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); - Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); - Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); - Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); - Variant v_memory_bound = get_val_from_rec(rec, "perf_raw::r8700"); + Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); // Check if any Variant is empty (use .empty()) - bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || - v_bad_spec.empty() || v_retiring.empty() || - v_slots_or_info_thread_slots.empty() || - v_memory_bound.empty(); + bool is_incomplete = + v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); // Check if bad values were obtained if (is_incomplete) return ret; - double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() + - v_fe_bound.to_double() + v_be_bound.to_double()); - // Copied from compute_toplevel - double backend_bound = (v_be_bound.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); + uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); - double memory_bound = (v_memory_bound.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); + double backend_bound = + get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET); + double memory_bound = + get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, MEM_BOUND_OFFSET); double core_bound = std::max(0.0, backend_bound - memory_bound); // Add toplevel metrics to vector of Entry @@ -198,35 +171,22 @@ SapphireRapidsTopdown::compute_frontend_bound(const std::vector &rec) { // Get PAPI metrics for toplevel calculations Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); - Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); - Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); - Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); - Variant v_int_misc_uop_dropping = - get_val_from_rec(rec, "INT_MISC:UOP_DROPPING"); - Variant v_fetch_latency = get_val_from_rec(rec, "perf_raw::r8600"); + Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); // Check if any Variant is empty (use .empty()) bool is_incomplete = - v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || - v_retiring.empty() || v_int_misc_uop_dropping.empty() || - v_slots_or_info_thread_slots.empty() || v_fetch_latency.empty(); + v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); // Check if bad values were obtained if (is_incomplete) return ret; - double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() + - v_fe_bound.to_double() + v_be_bound.to_double()); - // Copied from compute_toplevel - double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) - - (v_int_misc_uop_dropping.to_double() / - v_slots_or_info_thread_slots.to_double()); - - double fetch_latency = (v_fetch_latency.to_double() / toplevel_sum) - - (v_int_misc_uop_dropping.to_double() / - v_slots_or_info_thread_slots.to_double()); + uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); + double frontend_bound = + get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET); + double fetch_latency = + get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FETCH_LAT_OFFSET); double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency); // Add toplevel metrics to vector of Entry @@ -249,40 +209,22 @@ SapphireRapidsTopdown::compute_bad_speculation(const std::vector &rec) { // Get PAPI metrics for toplevel calculations Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); - Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); - Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); - Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); - Variant v_int_misc_uop_dropping = - get_val_from_rec(rec, "INT_MISC:UOP_DROPPING"); - Variant v_branch_mispredict = get_val_from_rec(rec, "perf_raw::r8500"); + Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); // Check if any Variant is empty (use .empty()) bool is_incomplete = - v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || - v_retiring.empty() || v_int_misc_uop_dropping.empty() || - v_slots_or_info_thread_slots.empty() || v_branch_mispredict.empty(); + v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); // Check if bad values were obtained if (is_incomplete) return ret; - // Perform toplevel calcs - double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() + - v_fe_bound.to_double() + v_be_bound.to_double()); - - double retiring = (v_retiring.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); - double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) - - (v_int_misc_uop_dropping.to_double() / - v_slots_or_info_thread_slots.to_double()); - double backend_bound = (v_be_bound.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); - double bad_speculation = - std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0); + uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); - double branch_mispredict = (v_branch_mispredict.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); + double bad_speculation = + get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET); + double branch_mispredict = get_tma_percent_from_rdpmc_value( + tma_metric_papi_rdpmc, BR_MISPRED_OFFSET); double machine_clears = std::max(0.0, bad_speculation - branch_mispredict); // Add toplevel metrics to vector of Entry