Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactors topdown service to support multiple types of calculations and adds support for Sapphire Rapids #576

Merged
merged 11 commits into from
Oct 25, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Reworks SPR topdown implementation to use rdpmc-style values instead …
…of raw counter values
  • Loading branch information
ilumsden committed Oct 25, 2024
commit 0d98d08ad3fabe73c67fac49cd17add263c91e90
178 changes: 60 additions & 118 deletions src/services/topdown/SapphireRapidsTopdown.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,21 @@

#include <algorithm>

#define RETIRING_OFFSET 0
#define BAD_SPEC_OFFSET 1
#define FE_BOUND_OFFSET 2
#define BE_BOUND_OFFSET 3

#define HEAVY_OPS_OFFSET 4
#define BR_MISPRED_OFFSET 5
#define FETCH_LAT_OFFSET 6
#define MEM_BOUND_OFFSET 7

static double get_tma_percent_from_rdpmc_value(uint64_t rdpmc_value,
uint64_t offset) {
return (double)((rdpmc_value >> (offset * 8)) & 0xff) / 0xff;
}

namespace cali {
namespace topdown {

Expand All @@ -10,22 +25,10 @@ SapphireRapidsTopdown::SapphireRapidsTopdown(IntelTopdownLevel level)
level,
// top_counters
"perf::slots"
",perf::topdown-retiring"
",perf::topdown-bad-spec"
",perf::topdown-fe-bound"
",perf::topdown-be-bound"
",INT_MISC:UOP_DROPPING",
",perf::topdown-retiring",
// all_counters
"perf::slots"
",perf::topdown-retiring"
",perf::topdown-bad-spec"
",perf::topdown-fe-bound"
",perf::topdown-be-bound"
",INT_MISC:UOP_DROPPING"
",perf_raw::r8400" // topdown-heavy-ops
",perf_raw::r8500" // topdown-br-mispredict
",perf_raw::r8600" // topdown-fetch-lat
",perf_raw::r8700", // topdown-mem-bound
",perf::topdown-retiring",
// res_top
{"retiring", "backend_bound", "frontend_bound", "bad_speculation"},
// res_all
Expand All @@ -44,43 +47,29 @@ SapphireRapidsTopdown::compute_toplevel(const std::vector<Entry> &rec) {

// Get PAPI metrics for toplevel calculations
Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
Variant v_int_misc_uop_dropping =
get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");

// Check if any Variant is empty (use .empty())
bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
v_bad_spec.empty() || v_retiring.empty() ||
v_int_misc_uop_dropping.empty() ||
v_slots_or_info_thread_slots.empty();
bool is_incomplete =
v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
// Check if all Variants are greater than 0 when casted to doubles (use
// .to_double())
bool is_nonzero =
v_fe_bound.to_double() > 0.0 && v_be_bound.to_double() > 0.0 &&
v_bad_spec.to_double() > 0.0 && v_retiring.to_double() > 0.0 &&
v_int_misc_uop_dropping.to_double() > 0.0 &&
v_slots_or_info_thread_slots.to_double() > 0.0;
bool is_nonzero = v_tma_metrics.to_uint() > 0;

// Check if bad values were obtained
if (is_incomplete || !is_nonzero)
return ret;

// Perform toplevel calcs
double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
v_fe_bound.to_double() + v_be_bound.to_double());

double retiring = (v_retiring.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
(v_int_misc_uop_dropping.to_double() /
v_slots_or_info_thread_slots.to_double());
double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();

double retiring =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET);
double frontend_bound =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET);
double backend_bound =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET);
double bad_speculation =
std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET);

// Add toplevel metrics to vector of Entry
ret.reserve(4);
Expand All @@ -106,30 +95,22 @@ SapphireRapidsTopdown::compute_retiring(const std::vector<Entry> &rec) {

// Get PAPI metrics for toplevel calculations
Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
Variant v_heavy_ops = get_val_from_rec(rec, "perf_raw::r8400");
Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");

// Check if any Variant is empty (use .empty())
bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
v_bad_spec.empty() || v_retiring.empty() ||
v_slots_or_info_thread_slots.empty() ||
v_heavy_ops.empty();
bool is_incomplete =
v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();

// Check if bad values were obtained
if (is_incomplete)
return ret;

double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
v_fe_bound.to_double() + v_be_bound.to_double());
// Copied from compute_toplevel
double retiring = (v_retiring.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();

double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
double retiring =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET);
double heavy_ops =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, HEAVY_OPS_OFFSET);
double light_ops = std::max(0.0, retiring - heavy_ops);

// Add toplevel metrics to vector of Entry
Expand All @@ -152,30 +133,22 @@ SapphireRapidsTopdown::compute_backend_bound(const std::vector<Entry> &rec) {

// Get PAPI metrics for toplevel calculations
Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
Variant v_memory_bound = get_val_from_rec(rec, "perf_raw::r8700");
Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");

// Check if any Variant is empty (use .empty())
bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
v_bad_spec.empty() || v_retiring.empty() ||
v_slots_or_info_thread_slots.empty() ||
v_memory_bound.empty();
bool is_incomplete =
v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();

// Check if bad values were obtained
if (is_incomplete)
return ret;

double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
v_fe_bound.to_double() + v_be_bound.to_double());
// Copied from compute_toplevel
double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();

double memory_bound = (v_memory_bound.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
double backend_bound =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET);
double memory_bound =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, MEM_BOUND_OFFSET);
double core_bound = std::max(0.0, backend_bound - memory_bound);

// Add toplevel metrics to vector of Entry
Expand All @@ -198,35 +171,22 @@ SapphireRapidsTopdown::compute_frontend_bound(const std::vector<Entry> &rec) {

// Get PAPI metrics for toplevel calculations
Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
Variant v_int_misc_uop_dropping =
get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
Variant v_fetch_latency = get_val_from_rec(rec, "perf_raw::r8600");
Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");

// Check if any Variant is empty (use .empty())
bool is_incomplete =
v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
v_slots_or_info_thread_slots.empty() || v_fetch_latency.empty();
v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();

// Check if bad values were obtained
if (is_incomplete)
return ret;

double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
v_fe_bound.to_double() + v_be_bound.to_double());
// Copied from compute_toplevel
double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
(v_int_misc_uop_dropping.to_double() /
v_slots_or_info_thread_slots.to_double());

double fetch_latency = (v_fetch_latency.to_double() / toplevel_sum) -
(v_int_misc_uop_dropping.to_double() /
v_slots_or_info_thread_slots.to_double());
uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();

double frontend_bound =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET);
double fetch_latency =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FETCH_LAT_OFFSET);
double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency);

// Add toplevel metrics to vector of Entry
Expand All @@ -249,40 +209,22 @@ SapphireRapidsTopdown::compute_bad_speculation(const std::vector<Entry> &rec) {

// Get PAPI metrics for toplevel calculations
Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
Variant v_int_misc_uop_dropping =
get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
Variant v_branch_mispredict = get_val_from_rec(rec, "perf_raw::r8500");
Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");

// Check if any Variant is empty (use .empty())
bool is_incomplete =
v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
v_slots_or_info_thread_slots.empty() || v_branch_mispredict.empty();
v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();

// Check if bad values were obtained
if (is_incomplete)
return ret;

// Perform toplevel calcs
double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
v_fe_bound.to_double() + v_be_bound.to_double());

double retiring = (v_retiring.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
(v_int_misc_uop_dropping.to_double() /
v_slots_or_info_thread_slots.to_double());
double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
double bad_speculation =
std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();

double branch_mispredict = (v_branch_mispredict.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
double bad_speculation =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET);
double branch_mispredict = get_tma_percent_from_rdpmc_value(
tma_metric_papi_rdpmc, BR_MISPRED_OFFSET);
double machine_clears = std::max(0.0, bad_speculation - branch_mispredict);

// Add toplevel metrics to vector of Entry
Expand Down