From 01bf472599393443e2ad3d667e173ab66999fce5 Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Sat, 20 Jul 2024 17:16:42 -0700
Subject: [PATCH 01/11] Reimplements the IntelTopdown service to support both
 Haswell/Broadwell calculations and Sapphire Rapids/Emerald Rapids
 calculations

---
 src/services/topdown/IntelTopdown.cpp | 649 ++++++++++++++++++++------
 1 file changed, 503 insertions(+), 146 deletions(-)
diff --git a/src/services/topdown/IntelTopdown.cpp b/src/services/topdown/IntelTopdown.cpp
index 80064bb2..1724f561 100644
--- a/src/services/topdown/IntelTopdown.cpp
+++ b/src/services/topdown/IntelTopdown.cpp
@@ -26,28 +26,24 @@ using namespace cali;
 namespace
 {
 
-class IntelTopdown
+enum IntelTopdownLevel { All = 1, Top = 2 };
+
+class TopdownCalculator
 {
-    static const char* s_top_counters;
-    static const char* s_all_counters;
+protected:
 
-    std::map<std::string, Attribute> counter_attrs;
-    std::map<std::string, Attribute> result_attrs;
+    IntelTopdownLevel m_level;
 
-    std::map<std::string, int> counters_not_found;
+    const char* m_top_counters;
+    const char* m_all_counters;
 
-    unsigned num_top_computed;
-    unsigned num_top_skipped;
-    unsigned num_be_computed;
-    unsigned num_be_skipped;
-    unsigned num_fe_computed;
-    unsigned num_fe_skipped;
-    unsigned num_bsp_computed;
-    unsigned num_bsp_skipped;
+    const char* s_res_top[];
+    const char* s_res_all[];
 
-    enum Level { All = 1, Top = 2 };
+    std::map<std::string, Attribute> m_counter_attrs;
+    std::map<std::string, Attribute> m_result_attrs;
 
-    Level level;
+    std::map<std::string, int> m_counters_not_found;
 
     Variant get_val_from_rec(const std::vector<Entry>& rec, const char* name)
     {
@@ -64,14 +60,38 @@ class IntelTopdown
         if (it != rec.end())
             ret = it->value();
         else
-            ++counters_not_found[std::string(name)];
+            ++m_counters_not_found[std::string(name)];
 
         return ret;
     }
 
+public:
+
+    TopdownCalculator(IntelTopdownLevel level) : m_level(level) {}
+
+    virtual std::vector<Entry> compute_toplevel(const std::vector<Entry>& rec) = 0;
+
+    virtual std::size_t get_num_expected_toplevel() const = 0;
+
+    virtual std::vector<Entry> compute_retiring(const std::vector<Entry>& rec) = 0;
+
+    virtual std::size_t get_num_expected_retiring() const = 0;
+
+    virtual std::vector<Entry> compute_backend_bound(const std::vector<Entry>& rec) = 0;
+
+    virtual std::size_t get_num_expected_backend_bound() const = 0;
+
+    virtual std::vector<Entry> compute_frontend_bound(const std::vector<Entry>& rec) = 0;
+
+    virtual std::size_t get_num_expected_frontend_bound() const = 0;
+
+    virtual std::vector<Entry> compute_bad_speculation(const std::vector<Entry>& rec) = 0;
+
+    virtual std::size_t get_num_expected_bad_speculation() const = 0;
+
     bool find_counter_attrs(CaliperMetadataAccessInterface& db)
     {
-        const char* list     = (level == All ? s_all_counters : s_top_counters);
+        const char* list     = (m_level == All ? m_all_counters : m_top_counters);
         auto        counters = StringConverter(list).to_stringlist();
 
         for (const auto& s : counters) {
@@ -84,7 +104,7 @@ class IntelTopdown
                 return false;
             }
 
-            counter_attrs[s] = attr;
+            m_counter_attrs[s] = attr;
         }
 
         return true;
@@ -92,24 +112,80 @@ class IntelTopdown
 
     void make_result_attrs(CaliperMetadataAccessInterface& db)
     {
-        const char* res_top[] = { "retiring", "backend_bound", "frontend_bound", "bad_speculation", nullptr };
-        const char* res_all[] = { "retiring",         "backend_bound",      "frontend_bound",
-                                  "bad_speculation",  "branch_mispredict",  "machine_clears",
-                                  "frontend_latency", "frontend_bandwidth", "memory_bound",
-                                  "core_bound",       "ext_mem_bound",      "l1_bound",
-                                  "l2_bound",         "l3_bound",           nullptr };
-
-        const char** res = (level == Top ? res_top : res_all);
+        const char** res = (m_level == Top ? m_res_top : m_res_all);
 
         for (const char** s = res; s && *s; ++s)
-            result_attrs[std::string(*s)] = db.create_attribute(
+            m_result_attrs[std::string(*s)] = db.create_attribute(
                 std::string("topdown.") + (*s),
                 CALI_TYPE_DOUBLE,
                 CALI_ATTR_ASVALUE | CALI_ATTR_SKIP_EVENTS
             );
     }
 
-    std::vector<Entry> compute_toplevel(const std::vector<Entry>& rec)
+    const std::map<std::string, int>& get_counters_not_found() const { return m_counters_not_found; }
+
+    const char* get_counters() const
+    {
+        if (m_level == All) {
+            return m_all_counters;
+        } else {
+            return m_top_counters;
+        }
+    }
+
+    IntelTopdownLevel get_level() const { return m_level; }
+};
+
+class HaswellTopdown
+{
+public:
+
+    HaswellTopdown(IntelTopdownLevel level)
+        : TopdownCalculator(level),
+          m_top_counters(
+              "CPU_CLK_THREAD_UNHALTED:THREAD_P"
+              ",IDQ_UOPS_NOT_DELIVERED:CORE"
+              ",INT_MISC:RECOVERY_CYCLES"
+              ",UOPS_ISSUED:ANY"
+              ",UOPS_RETIRED:RETIRE_SLOTS"
+          ),
+          m_all_counters(
+              "BR_MISP_RETIRED:ALL_BRANCHES"
+              ",CPU_CLK_THREAD_UNHALTED:THREAD_P"
+              ",CYCLE_ACTIVITY:CYCLES_NO_EXECUTE"
+              ",CYCLE_ACTIVITY:STALLS_L1D_PENDING"
+              ",CYCLE_ACTIVITY:STALLS_L2_PENDING"
+              ",CYCLE_ACTIVITY:STALLS_LDM_PENDING"
+              ",IDQ_UOPS_NOT_DELIVERED:CORE"
+              ",IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE"
+              ",INT_MISC:RECOVERY_CYCLES"
+              ",MACHINE_CLEARS:COUNT"
+              ",MEM_LOAD_UOPS_RETIRED:L3_HIT"
+              ",MEM_LOAD_UOPS_RETIRED:L3_MISS"
+              ",UOPS_EXECUTED:CORE_CYCLES_GE_1"
+              ",UOPS_EXECUTED:CORE_CYCLES_GE_2"
+              ",UOPS_ISSUED:ANY"
+              ",UOPS_RETIRED:RETIRE_SLOTS"
+          ),
+          m_res_top({ "retiring", "backend_bound", "frontend_bound", "bad_speculation", nullptr }),
+          m_res_all({ "retiring",
+                      "backend_bound",
+                      "frontend_bound",
+                      "bad_speculation",
+                      "branch_mispredict",
+                      "machine_clears",
+                      "frontend_latency",
+                      "frontend_bandwidth",
+                      "memory_bound",
+                      "core_bound",
+                      "ext_mem_bound",
+                      "l1_bound",
+                      "l2_bound",
+                      "l3_bound",
+                      nullptr })
+    {}
+
+    virtual std::vector<Entry> compute_toplevel(const std::vector<Entry>& rec) override
     {
         std::vector<Entry> ret;
 
@@ -139,15 +215,21 @@ class IntelTopdown
         double backend_bound  = 1.0 - (retiring + bad_speculation + frontend_bound);
 
         ret.reserve(4);
-        ret.push_back(Entry(result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
-        ret.push_back(Entry(result_attrs["backend_bound"], Variant(std::max(backend_bound, 0.0))));
-        ret.push_back(Entry(result_attrs["frontend_bound"], Variant(std::max(frontend_bound, 0.0))));
-        ret.push_back(Entry(result_attrs["bad_speculation"], Variant(std::max(bad_speculation, 0.0))));
+        ret.push_back(Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
+        ret.push_back(Entry(m_result_attrs["backend_bound"], Variant(std::max(backend_bound, 0.0))));
+        ret.push_back(Entry(m_result_attrs["frontend_bound"], Variant(std::max(frontend_bound, 0.0))));
+        ret.push_back(Entry(m_result_attrs["bad_speculation"], Variant(std::max(bad_speculation, 0.0))));
 
         return ret;
     }
 
-    std::vector<Entry> compute_backend_bound(const std::vector<Entry>& rec)
+    virtual std::size_t get_num_expected_toplevel() const override { return 4; }
+
+    virtual std::vector<Entry> compute_retiring(const std::vector<Entry>& rec) override { return {}; }
+
+    virtual std::size_t get_num_expected_retiring() const override { return 0; }
+
+    virtual std::vector<Entry> compute_backend_bound(const std::vector<Entry>& rec) override
     {
         std::vector<Entry> ret;
 
@@ -203,7 +285,9 @@ class IntelTopdown
         return ret;
     }
 
-    std::vector<Entry> compute_frontend_bound(const std::vector<Entry>& rec)
+    virtual std::size_t get_num_expected_backend_bound() const override { return 6; }
+
+    virtual std::vector<Entry> compute_frontend_bound(const std::vector<Entry>& rec) override
     {
         std::vector<Entry> ret;
 
@@ -227,7 +311,9 @@ class IntelTopdown
         return ret;
     }
 
-    std::vector<Entry> compute_bad_speculation(const std::vector<Entry>& rec)
+    virtual std::size_t get_num_expected_frontend_bound() const override { return 2; }
+
+    virtual std::vector<Entry> compute_bad_speculation(const std::vector<Entry>& rec) override
     {
         std::vector<Entry> ret;
 
@@ -251,11 +337,285 @@ class IntelTopdown
         return ret;
     }
 
+    virtual std::size_t get_num_expected_bad_speculation() const override { return 2; }
+};
+
+class SapphireRapidsTopdown
+{
+public:
+
+    SapphireRapidsTopdown(IntelTopdownLevel level)
+        : TopdownCalculator(level),
+          m_top_counters(
+              "perf::slots"
+              ",perf::topdown-retiring"
+              ",perf::topdown-bad-spec"
+              ",perf::topdown-fe-bound"
+              ",perf::topdown-be-bound"
+              ",INT_MISC:UOP_DROPPING"
+          ),
+          m_all_counters(
+              "perf::slots"
+              ",perf::topdown-retiring"
+              ",perf::topdown-bad-spec"
+              ",perf::topdown-fe-bound"
+              ",perf::topdown-be-bound"
+              ",INT_MISC:UOP_DROPPING"
+              ",perf_raw::r8400" // topdown-heavy-ops
+              ",perf_raw::r8500" // topdown-br-mispredict
+              ",perf_raw::r8600" // topdown-fetch-lat
+              ",perf_raw::r8700"
+          ), // topdown-mem-bound
+          m_res_top({ "retiring", "backend_bound", "frontend_bound", "bad_speculation", nullptr }),
+          m_res_all({ "retiring",
+                      "backend_bound",
+                      "frontend_bound",
+                      "bad_speculation",
+                      "branch_mispredict",
+                      "machine_clears",
+                      "frontend_latency",
+                      "frontend_bandwidth",
+                      "memory_bound",
+                      "core_bound",
+                      "light_ops",
+                      "heavy_ops",
+                      nullptr })
+    {}
+
+    virtual std::vector<Entry> compute_toplevel(const std::vector<Entry>& rec) override
+    {
+        std::vector<Entry> ret;
+
+        // Get PAPI metrics for toplevel calculations
+        Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
+        Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
+        Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
+        Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
+        Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MSC:UOP_DROPPING");
+        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+
+        // Check if any Variant is empty (use .empty())
+        bool is_incomplete = v_fe_bound.empty() || v_be_bound.emtpy() || v_bad_spec.empty() || v_retiring.empty()
+                             || v_int_misc_uop_dropping.empty() || v_slots_or_info_thread_slots.empty();
+        // Check if all Variants are greater than 0 when casted to doubles (use
+        // .to_double())
+        bool is_nonzero = v_fe_bound.to_double() > 0.0 && v_be_bound.to_double() > 0.0 && v_bad_spec.to_double() > 0.0
+                          && v_retiring.to_double() > 0.0 && v_int_misc_uop_dropping.to_double() > 0.0
+                          && v_slots_or_info_thread_slots.to_double() > 0.0;
+
+        // Check if bad values were obtained
+        if (is_incomplete || !is_nonzero)
+            return ret;
+
+        // Perform toplevel calcs
+        double toplevel_sum =
+            (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
+
+        double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+        double frontend_bound =
+            (v_fe_bound.to_double() / toplevel_sum) - (v_int_misc_uop_dropping / v_slots_or_info_thread_slots);
+        double backend_bound   = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+        double bad_speculation = std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
+
+        // Add toplevel metrics to vector of Entry
+        ret.reserve(4);
+        ret.push_back(Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
+        ret.push_back(Entry(m_result_attrs["backend_bound"], Variant(std::max(backend_bound, 0.0))));
+        ret.push_back(Entry(m_result_attrs["frontend_bound"], Variant(std::max(frontend_bound, 0.0))));
+        ret.push_back(Entry(m_result_attrs["bad_speculation"], Variant(std::max(bad_speculation, 0.0))));
+
+        return ret;
+    }
+
+    virtual std::size_t get_num_expected_toplevel() const override { return 4; }
+
+    virtual std::vector<Entry> compute_retiring(const std::vector<Entry>& rec) override
+    {
+        std::vector<Entry> ret;
+
+        // Get PAPI metrics for toplevel calculations
+        Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
+        Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
+        Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
+        Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
+        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+        Variant v_heavy_ops                  = get_val_from_rec(rec, "perf_raw::r8400");
+
+        // Check if any Variant is empty (use .empty())
+        bool is_incomplete = v_fe_bound.empty() || v_be_bound.emtpy() || v_bad_spec.empty() || v_retiring.empty()
+                             || v_slots_or_info_thread_slots.empty() || v_heavy_ops.empty();
+
+        // Check if bad values were obtained
+        if (is_incomplete)
+            return ret;
+
+        double toplevel_sum =
+            (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
+        // Copied from compute_toplevel
+        double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+
+        double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+        double light_ops = std::max(0.0, retiring - heavy_ops);
+
+        // Add toplevel metrics to vector of Entry
+        ret.reserve(2);
+        ret.push_back(Entry(m_result_attrs["heavy_ops"], Variant(std::max(heavy_ops, 0.0))));
+        ret.push_back(Entry(m_result_attrs["light_ops"], Variant(std::max(light_ops, 0.0))));
+
+        return ret;
+    }
+
+    virtual std::size_t get_num_expected_retiring() const override { return 2; }
+
+    virtual std::vector<Entry> compute_backend_bound(const std::vector<Entry>& rec) override
+    {
+        std::vector<Entry> ret;
+
+        // Get PAPI metrics for toplevel calculations
+        Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
+        Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
+        Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
+        Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
+        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+        Variant v_memory_bound               = get_val_from_rec(rec, "perf_raw::r8700");
+
+        // Check if any Variant is empty (use .empty())
+        bool is_incomplete = v_fe_bound.empty() || v_be_bound.emtpy() || v_bad_spec.empty() || v_retiring.empty()
+                             || v_slots_or_info_thread_slots.empty() || v_memory_bound.empty();
+
+        // Check if bad values were obtained
+        if (is_incomplete)
+            return ret;
+
+        double toplevel_sum =
+            (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
+        // Copied from compute_toplevel
+        double backend_bound = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+
+        double memory_bound = (v_memory_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+        double core_bound   = std::max(0.0, backend_bound - memory_bound);
+
+        // Add toplevel metrics to vector of Entry
+        ret.reserve(2);
+        ret.push_back(Entry(m_result_attrs["backend_bound"], Variant(std::max(backend_bound, 0.0))));
+        ret.push_back(Entry(m_result_attrs["memory_bound"], Variant(std::max(memory_bound, 0.0))));
+
+        return ret;
+    }
+
+    virtual std::size_t get_num_expected_backend_bound() const override { return 2; }
+
+    virtual std::vector<Entry> compute_frontend_bound(const std::vector<Entry>& rec) override
+    {
+        std::vector<Entry> ret;
+
+        // Get PAPI metrics for toplevel calculations
+        Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
+        Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
+        Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
+        Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
+        Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MSC:UOP_DROPPING");
+        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+        Variant v_fetch_latency              = get_val_from_rec(rec, "perf_raw::r8600");
+
+        // Check if any Variant is empty (use .empty())
+        bool is_incomplete = v_fe_bound.empty() || v_be_bound.emtpy() || v_bad_spec.empty() || v_retiring.empty()
+                             || v_int_misc_uop_dropping.empty() || v_slots_or_info_thread_slots.empty()
+                             || v_memory_bound.empty();
+
+        // Check if bad values were obtained
+        if (is_incomplete)
+            return ret;
+
+        double toplevel_sum =
+            (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
+        // Copied from compute_toplevel
+        double frontend_bound =
+            (v_fe_bound.to_double() / toplevel_sum) - (v_int_misc_uop_dropping / v_slots_or_info_thread_slots);
+
+        double fetch_latency =
+            (v_fetch_latency.to_double() / toplevel_sum) - (v_int_misc_uop_dropping * v_slots_or_info_thread_slots);
+        double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency);
+
+        // Add toplevel metrics to vector of Entry
+        ret.reserve(2);
+        ret.push_back(Entry(m_result_attrs["frontend_latency"], Variant(std::max(fetch_latency, 0.0))));
+        ret.push_back(Entry(m_result_attrs["frontend_bandwidth"], Variant(std::max(fetch_bandwidth, 0.0))));
+
+        return ret;
+    }
+
+    virtual std::size_t get_num_expected_frontend_bound() const override { return 2; }
+
+    virtual std::vector<Entry> compute_bad_speculation(const std::vector<Entry>& rec) override
+    {
+        std::vector<Entry> ret;
+
+        // Get PAPI metrics for toplevel calculations
+        Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
+        Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
+        Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
+        Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
+        Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MSC:UOP_DROPPING");
+        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+        Variant v_branch_mispredict          = get_val_from_rec(rec, "perf_raw::r8500");
+
+        // Check if any Variant is empty (use .empty())
+        bool is_incomplete = v_fe_bound.empty() || v_be_bound.emtpy() || v_bad_spec.empty() || v_retiring.empty()
+                             || v_int_misc_uop_dropping.empty() || v_slots_or_info_thread_slots.empty()
+                             || v_branch_mispredict.empty();
+
+        // Check if bad values were obtained
+        if (is_incomplete)
+            return ret;
+
+        // Perform toplevel calcs
+        double toplevel_sum =
+            (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
+
+        double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+        double frontend_bound =
+            (v_fe_bound.to_double() / toplevel_sum) - (v_int_misc_uop_dropping / v_slots_or_info_thread_slots);
+        double backend_bound   = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+        double bad_speculation = std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
+
+        double branch_mispredict =
+            (v_branch_mispredict.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+        double machine_clears = std::max(0.0, bad_speculation - branch_mispredict);
+
+        // Add toplevel metrics to vector of Entry
+        ret.reserve(2);
+        ret.push_back(Entry(m_result_attrs["branch_mispredict"], Variant(std::max(branch_mispredict, 0.0))));
+        ret.push_back(Entry(m_result_attrs["machine_clears"], Variant(std::max(machine_clears, 0.0))));
+
+        return ret;
+    }
+
+    virtual std::size_t get_num_expected_bad_speculation() const override { return 2; }
+};
+
+class IntelTopdown
+{
+    unsigned num_top_computed;
+    unsigned num_top_skipped;
+    unsigned num_be_computed;
+    unsigned num_be_skipped;
+    unsigned num_fe_computed;
+    unsigned num_fe_skipped;
+    unsigned num_bsp_computed;
+    unsigned num_bsp_skipped;
+    unsigned num_ret_computed;
+    unsigned num_ret_skipped;
+
+    IntelTopdownLevel level;
+
+    TopdownCalculator* m_calculator;
+
     void postprocess_snapshot_cb(std::vector<Entry>& rec)
     {
-        std::vector<Entry> result = compute_toplevel(rec);
+        std::vector<Entry> result = m_calculator->compute_toplevel(rec);
 
-        if (result.empty())
+        if (result.size() != m_calculator->get_num_expected_toplevel())
             ++num_top_skipped;
         else {
             rec.insert(rec.end(), result.begin(), result.end());
@@ -263,144 +623,141 @@ class IntelTopdown
         }
 
         if (level == All) {
-            result = compute_backend_bound(rec);
+            result = m_calculator->compute_backend_bound(rec);
 
-            if (result.empty())
+            if (result.size() != m_calculator->get_num_expected_backend_bound())
                 ++num_be_skipped;
             else {
                 rec.insert(rec.end(), result.begin(), result.end());
                 ++num_be_computed;
             }
 
-            result = compute_frontend_bound(rec);
+            result = m_calculator->compute_frontend_bound(rec);
 
-            if (result.empty())
+            if (result.size() != m_calculator->get_num_expected_frontend_bound())
                 ++num_fe_skipped;
             else {
                 rec.insert(rec.end(), result.begin(), result.end());
                 ++num_fe_computed;
             }
 
-            result = compute_bad_speculation(rec);
+            result = m_calculator->compute_bad_speculation(rec);
 
-            if (result.empty())
+            if (result.size() != m_calculator->get_num_expected_bad_speculation())
                 ++num_bsp_skipped;
             else {
                 rec.insert(rec.end(), result.begin(), result.end());
                 ++num_bsp_computed;
             }
+
+            result = m_calculator->compute_retiring(rec);
+
+            if (result.size() != m_calculator->get_num_expected_retiring())
+                ++num_ret_skipped;
+            else {
+                rec.insert(rec.end(), result.begin(), result.end());
+                ++num_ret_computed;
+            }
         }
-    }
 
-    void finish_cb(Caliper* c, Channel* channel)
-    {
-        Log(1).stream() << channel->name() << ": topdown: Computed topdown metrics for " << num_top_computed
-                        << " records, skipped " << num_top_skipped << std::endl;
-
-        if (Log::verbosity() >= 2) {
-            Log(2).stream() << channel->name() << ": topdown: Records processed per topdown level: "
-                            << "\n  top:      " << num_top_computed << " computed, " << num_top_skipped << " skipped,"
-                            << "\n  bad spec: " << num_bsp_computed << " computed, " << num_bsp_skipped << " skipped,"
-                            << "\n  frontend: " << num_bsp_computed << " computed, " << num_bsp_skipped << " skipped,"
-                            << "\n  backend:  " << num_bsp_computed << " computed, " << num_bsp_skipped << " skipped."
-                            << std::endl;
-
-            if (!counters_not_found.empty()) {
-                std::ostringstream os;
-                for (auto& p : counters_not_found)
-                    os << "\n  " << p.first << ": " << p.second;
-                Log(2).stream() << channel->name() << ": topdown: Counters not found:" << os.str() << std::endl;
+        void finish_cb(Caliper * c, Channel * channel)
+        {
+            Log(1).stream() << channel->name() << ": topdown: Computed topdown metrics for " << num_top_computed
+                            << " records, skipped " << num_top_skipped << std::endl;
+
+            if (Log::verbosity() >= 2) {
+                Log(2).stream() << channel->name() << ": topdown: Records processed per topdown level: "
+                                << "\n  top:      " << num_top_computed << " computed, " << num_top_skipped
+                                << " skipped,"
+                                << "\n  bad spec: " << num_bsp_computed << " computed, " << num_bsp_skipped
+                                << " skipped,"
+                                << "\n  frontend: " << num_bsp_computed << " computed, " << num_bsp_skipped
+                                << " skipped,"
+                                << "\n  backend:  " << num_bsp_computed << " computed, " << num_bsp_skipped
+                                << " skipped." << std::endl;
+
+                const std::map<std::string, int>& counters_not_found = m_calculator->get_counters_not_found();
+
+                if (!counters_not_found.empty()) {
+                    std::ostringstream os;
+                    for (auto& p : counters_not_found)
+                        os << "\n  " << p.first << ": " << p.second;
+                    Log(2).stream() << channel->name() << ": topdown: Counters not found:" << os.str() << std::endl;
+                }
             }
         }
-    }
 
-    explicit IntelTopdown(Level lvl)
-        : num_top_computed(0),
-          num_top_skipped(0),
-          num_be_computed(0),
-          num_be_skipped(0),
-          num_fe_computed(0),
-          num_fe_skipped(0),
-          num_bsp_computed(0),
-          num_bsp_skipped(0),
-          level(lvl)
-    {}
+        explicit IntelTopdown(TopdownCalculator * calculator)
+            : num_top_computed(0),
+              num_top_skipped(0),
+              num_be_computed(0),
+              num_be_skipped(0),
+              num_fe_computed(0),
+              num_fe_skipped(0),
+              num_bsp_computed(0),
+              num_bsp_skipped(0),
+              level(calculator->get_level()),
+              m_calculator(calculator)
+        {}
+
+        ~IntelTopdown()
+        {
+            if (m_calculator != nullptr) {
+                delete m_calculator;
+            }
+        }
 
-public:
+    public:
 
-    static const char* s_spec;
+        static const char* s_spec;
 
-    static void intel_topdown_register(Caliper* c, Channel* channel)
-    {
-        Level       level    = Top;
-        const char* counters = s_top_counters;
-
-        auto        config = services::init_config_from_spec(channel->config(), s_spec);
-        std::string lvlcfg = config.get("level").to_string();
-
-        if (lvlcfg == "all") {
-            level    = All;
-            counters = s_all_counters;
-        } else if (lvlcfg != "top") {
-            Log(0).stream() << channel->name() << ": topdown: Unknown level \"" << lvlcfg << "\", skipping topdown"
-                            << std::endl;
-            return;
-        }
+        static void intel_topdown_register(Caliper * c, Channel * channel)
+        {
+            Level level = Top;
 
-        channel->config().set("CALI_PAPI_COUNTERS", counters);
+            auto        config = services::init_config_from_spec(channel->config(), s_spec);
+            std::string lvlcfg = config.get("level").to_string();
 
-        if (!cali::services::register_service(c, channel, "papi")) {
-            Log(0).stream() << channel->name() << ": topdown: Unable to register papi service, skipping topdown"
-                            << std::endl;
-            return;
-        }
+            if (lvlcfg == "all") {
+                level = All;
+            } else if (lvlcfg != "top") {
+                Log(0).stream() << channel->name() << ": topdown: Unknown level \"" << lvlcfg << "\", skipping topdown"
+                                << std::endl;
+                return;
+            }
 
-        IntelTopdown* instance = new IntelTopdown(level);
-
-        channel->events().pre_flush_evt.connect([instance](Caliper* c, Channel* channel, SnapshotView) {
-            if (instance->find_counter_attrs(*c))
-                instance->make_result_attrs(*c);
-            else
-                Log(0).stream() << channel->name() << ": topdown: Could not find counter attributes!" << std::endl;
-        });
-        channel->events().postprocess_snapshot.connect([instance](Caliper*, Channel*, std::vector<Entry>& rec) {
-            instance->postprocess_snapshot_cb(rec);
-        });
-        channel->events().finish_evt.connect([instance](Caliper* c, Channel* channel) {
-            instance->finish_cb(c, channel);
-            delete instance;
-        });
-
-        Log(1).stream() << channel->name() << ": Registered topdown service. Level: " << lvlcfg << "." << std::endl;
-    }
-};
+            // TODO Add logic to select correct TopdownCalculator
+            TopdownCalculator* calculator = new HaswellTopdown(level);
+
+            channel->config().set("CALI_PAPI_COUNTERS", calculator->get_counters());
+
+            if (!cali::services::register_service(c, channel, "papi")) {
+                Log(0).stream() << channel->name() << ": topdown: Unable to register papi service, skipping topdown"
+                                << std::endl;
+                return;
+            }
 
-const char* IntelTopdown::s_top_counters =
-    "CPU_CLK_THREAD_UNHALTED:THREAD_P"
-    ",IDQ_UOPS_NOT_DELIVERED:CORE"
-    ",INT_MISC:RECOVERY_CYCLES"
-    ",UOPS_ISSUED:ANY"
-    ",UOPS_RETIRED:RETIRE_SLOTS";
-
-const char* IntelTopdown::s_all_counters =
-    "BR_MISP_RETIRED:ALL_BRANCHES"
-    ",CPU_CLK_THREAD_UNHALTED:THREAD_P"
-    ",CYCLE_ACTIVITY:CYCLES_NO_EXECUTE"
-    ",CYCLE_ACTIVITY:STALLS_L1D_PENDING"
-    ",CYCLE_ACTIVITY:STALLS_L2_PENDING"
-    ",CYCLE_ACTIVITY:STALLS_LDM_PENDING"
-    ",IDQ_UOPS_NOT_DELIVERED:CORE"
-    ",IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE"
-    ",INT_MISC:RECOVERY_CYCLES"
-    ",MACHINE_CLEARS:COUNT"
-    ",MEM_LOAD_UOPS_RETIRED:L3_HIT"
-    ",MEM_LOAD_UOPS_RETIRED:L3_MISS"
-    ",UOPS_EXECUTED:CORE_CYCLES_GE_1"
-    ",UOPS_EXECUTED:CORE_CYCLES_GE_2"
-    ",UOPS_ISSUED:ANY"
-    ",UOPS_RETIRED:RETIRE_SLOTS";
-
-const char* IntelTopdown::s_spec = R"json(
+            IntelTopdown* instance = new IntelTopdown(calculator);
+
+            channel->events().pre_flush_evt.connect([instance](Caliper* c, Channel* channel, SnapshotView) {
+                if (instance->find_counter_attrs(*c))
+                    instance->make_result_attrs(*c);
+                else
+                    Log(0).stream() << channel->name() << ": topdown: Could not find counter attributes!" << std::endl;
+            });
+            channel->events().postprocess_snapshot.connect([instance](Caliper*, Channel*, std::vector<Entry>& rec) {
+                instance->postprocess_snapshot_cb(rec);
+            });
+            channel->events().finish_evt.connect([instance](Caliper* c, Channel* channel) {
+                instance->finish_cb(c, channel);
+                delete instance;
+            });
+
+            Log(1).stream() << channel->name() << ": Registered topdown service. Level: " << lvlcfg << "." << std::endl;
+        }
+    };
+
+    const char* IntelTopdown::s_spec = R"json(
 {   "name": "topdown",
     "description": "Record PAPI counters and compute top-down analysis for Intel CPUs",
     "config": [
@@ -416,8 +773,8 @@ const char* IntelTopdown::s_spec = R"json(
 } // namespace
 
 namespace cali
-{
 
-CaliperService topdown_service { ::IntelTopdown::s_spec, ::IntelTopdown::intel_topdown_register };
+{
 
+    CaliperService topdown_service { ::IntelTopdown::s_spec, ::IntelTopdown::intel_topdown_register };
 }

From cabc803f2128f9853f68f2dc1157afe500a1e0ca Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Sat, 20 Jul 2024 17:49:54 -0700
Subject: [PATCH 02/11] Adds infrastructure to update builtin_option_specs
 based on features like architecture support

---
 src/caliper/ConfigManager.cpp           | 34 ++++++++++++-------------
 src/caliper/controllers/controllers.cpp |  1 +
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/caliper/ConfigManager.cpp b/src/caliper/ConfigManager.cpp
index 97340782..98944ab2 100644
--- a/src/caliper/ConfigManager.cpp
+++ b/src/caliper/ConfigManager.cpp
@@ -7,8 +7,8 @@
 
 #include "caliper/common/Log.h"
 
-#include "../src/common/util/parse_util.h"
 #include "../src/common/util/format_util.h"
+#include "../src/common/util/parse_util.h"
 
 #include "../services/Services.h"
 
@@ -36,46 +36,43 @@ extern const char* builtin_umpire_option_specs;
 extern const char* builtin_papi_option_specs;
 extern const char* builtin_kokkos_option_specs;
 
-extern void add_submodule_controllers_and_services();
+extern const char* get_builtin_option_specs();
+extern void        add_submodule_controllers_and_services();
 
 } // namespace cali
 
 namespace
 {
 
-const char* builtin_option_specs_list[] =
-{
-    builtin_base_option_specs,
+const char* builtin_option_specs_list[] = { builtin_base_option_specs,
 #ifdef CALIPER_HAVE_GOTCHA
-    builtin_gotcha_option_specs,
+                                            builtin_gotcha_option_specs,
 #endif
 #ifdef CALIPER_HAVE_MPI
-    builtin_mpi_option_specs,
+                                            builtin_mpi_option_specs,
 #endif
 #ifdef CALIPER_HAVE_OMPT
-    builtin_openmp_option_specs,
+                                            builtin_openmp_option_specs,
 #endif
 #ifdef CALIPER_HAVE_CUPTI
-    builtin_cuda_option_specs,
+                                            builtin_cuda_option_specs,
 #endif
 #if defined(CALIPER_HAVE_ROCTRACER) || defined(CALIPER_HAVE_ROCPROFILER)
-    builtin_rocm_option_specs,
+                                            builtin_rocm_option_specs,
 #endif
 #ifdef CALIPER_HAVE_LIBDW
-    builtin_libdw_option_specs,
+                                            builtin_libdw_option_specs,
 #endif
 #ifdef CALIPER_HAVE_PAPI
-    builtin_papi_option_specs,
+                                            builtin_papi_option_specs,
 #endif
 #ifdef CALIPER_HAVE_PCP
-    builtin_pcp_option_specs,
+                                            builtin_pcp_option_specs,
 #endif
 #ifdef CALIPER_HAVE_UMPIRE
-    builtin_umpire_option_specs,
+                                            builtin_umpire_option_specs,
 #endif
-    builtin_kokkos_option_specs,
-    nullptr
-};
+                                            builtin_kokkos_option_specs, nullptr };
 
 ChannelController* make_basic_channel_controller(
     const char*                   name,
@@ -139,7 +136,8 @@ ConfigManager::arglist_t merge_new_elements(ConfigManager::arglist_t& to, const
             return p.first == v.first;
         });
 
-        if (it == to.end() || p.first == "metadata") // hacky but we want to allow multiple entries for metadata
+        if (it == to.end() || p.first == "metadata") // hacky but we want to allow multiple entries
+                                                     // for metadata
             to.push_back(p);
     }
 
diff --git a/src/caliper/controllers/controllers.cpp b/src/caliper/controllers/controllers.cpp
index 696b519d..929c0402 100644
--- a/src/caliper/controllers/controllers.cpp
+++ b/src/caliper/controllers/controllers.cpp
@@ -4,6 +4,7 @@
 #include "caliper/caliper-config.h"
 
 #include "caliper/ConfigManager.h"
+#include <cstring>
 
 namespace
 {

From c367be3f1f54aaea58ddeeeadf745c4e60f0473e Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Sat, 20 Jul 2024 18:26:06 -0700
Subject: [PATCH 03/11] Adds conditional behavior to topdown service and
 builtin option specs based on architecture specified at configure time

---
 CMakeLists.txt                          |   7 +
 caliper-config.h.in                     |   1 +
 src/caliper/ConfigManager.cpp           |  82 +++--
 src/caliper/controllers/controllers.cpp | 172 ++++++++-
 src/services/papi/Papi.cpp              |  32 +-
 src/services/topdown/IntelTopdown.cpp   | 461 +++++++++++++-----------
 6 files changed, 493 insertions(+), 262 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 528eb305..496d4c2d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -83,6 +83,12 @@ add_caliper_option(WITH_UMPIRE    "Enable Umpire statistics support" FALSE)
 add_caliper_option(WITH_CRAYPAT   "Enable CrayPAT region forwarding support" FALSE)
 add_caliper_option(WITH_LDMS      "Enable LDMS forwarder" FALSE)
 
+set(WITH_ARCH "" CACHE STRING "Enable features specific to the provided archspec CPU architecture name")
+if (NOT WITH_ARCH STREQUAL "")
+  string(TOLOWER ${WITH_ARCH} LOWER_WITH_ARCH)
+  set(CALIPER_HAVE_ARCH "${LOWER_WITH_ARCH}")
+endif ()
+
 add_caliper_option(USE_EXTERNAL_GOTCHA "Use pre-installed gotcha instead of building our own" FALSE)
 
 add_caliper_option(ENABLE_HISTOGRAMS "Enable histogram aggregation (experimental)" FALSE)
@@ -633,6 +639,7 @@ message(STATUS "Build type                : ${CMAKE_BUILD_TYPE}")
 message(STATUS "Compiler                  : ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION} (${CMAKE_CXX_COMPILER})")
 message(STATUS "Python interpreter        : ${Python_EXECUTABLE}")
 message(STATUS "System                    : ${CMAKE_SYSTEM} (${CMAKE_SYSTEM_PROCESSOR})")
+message(STATUS "Architecture              : ${CALIPER_HAVE_ARCH}")
 message(STATUS "Install dir               : ${CMAKE_INSTALL_PREFIX}")
 message(STATUS "Build shared libs         : ${BUILD_SHARED_LIBS}")
 message(STATUS "Build Caliper tools       : ${WITH_TOOLS}")
diff --git a/caliper-config.h.in b/caliper-config.h.in
index 0ead0425..d5602e97 100644
--- a/caliper-config.h.in
+++ b/caliper-config.h.in
@@ -26,6 +26,7 @@
 #cmakedefine CALIPER_HAVE_UMPIRE
 #cmakedefine CALIPER_HAVE_CRAYPAT
 #cmakedefine CALIPER_HAVE_LDMS
+#cmakedefine CALIPER_HAVE_ARCH "@CALIPER_HAVE_ARCH@"
 
 #cmakedefine CALIPER_REDUCED_CONSTEXPR_USAGE
 
diff --git a/src/caliper/ConfigManager.cpp b/src/caliper/ConfigManager.cpp
index 98944ab2..3d288ac1 100644
--- a/src/caliper/ConfigManager.cpp
+++ b/src/caliper/ConfigManager.cpp
@@ -33,47 +33,18 @@ extern const char* builtin_cuda_option_specs;
 extern const char* builtin_rocm_option_specs;
 extern const char* builtin_pcp_option_specs;
 extern const char* builtin_umpire_option_specs;
-extern const char* builtin_papi_option_specs;
 extern const char* builtin_kokkos_option_specs;
 
-extern const char* get_builtin_option_specs();
-extern void        add_submodule_controllers_and_services();
+extern const char* builtin_papi_hsw_option_specs;
+extern const char* builtin_papi_spr_option_specs;
+
+extern void add_submodule_controllers_and_services();
 
 } // namespace cali
 
 namespace
 {
 
-const char* builtin_option_specs_list[] = { builtin_base_option_specs,
-#ifdef CALIPER_HAVE_GOTCHA
-                                            builtin_gotcha_option_specs,
-#endif
-#ifdef CALIPER_HAVE_MPI
-                                            builtin_mpi_option_specs,
-#endif
-#ifdef CALIPER_HAVE_OMPT
-                                            builtin_openmp_option_specs,
-#endif
-#ifdef CALIPER_HAVE_CUPTI
-                                            builtin_cuda_option_specs,
-#endif
-#if defined(CALIPER_HAVE_ROCTRACER) || defined(CALIPER_HAVE_ROCPROFILER)
-                                            builtin_rocm_option_specs,
-#endif
-#ifdef CALIPER_HAVE_LIBDW
-                                            builtin_libdw_option_specs,
-#endif
-#ifdef CALIPER_HAVE_PAPI
-                                            builtin_papi_option_specs,
-#endif
-#ifdef CALIPER_HAVE_PCP
-                                            builtin_pcp_option_specs,
-#endif
-#ifdef CALIPER_HAVE_UMPIRE
-                                            builtin_umpire_option_specs,
-#endif
-                                            builtin_kokkos_option_specs, nullptr };
-
 ChannelController* make_basic_channel_controller(
     const char*                   name,
     const config_map_t&           initial_cfg,
@@ -901,6 +872,8 @@ struct ConfigManager::ConfigManagerImpl {
     bool        m_error     = false;
     std::string m_error_msg = "";
 
+    std::vector<const char*> builtin_option_specs_list;
+
     std::map<std::string, arglist_t> m_default_parameters_for_spec;
 
     arglist_t m_default_parameters;
@@ -1404,9 +1377,48 @@ struct ConfigManager::ConfigManagerImpl {
     }
 
     ConfigManagerImpl()
+        : builtin_option_specs_list({
+              builtin_base_option_specs,
+#ifdef CALIPER_HAVE_GOTCHA
+                  builtin_gotcha_option_specs,
+#endif
+#ifdef CALIPER_HAVE_MPI
+                  builtin_mpi_option_specs,
+#endif
+#ifdef CALIPER_HAVE_OMPT
+                  builtin_openmp_option_specs,
+#endif
+#ifdef CALIPER_HAVE_CUPTI
+                  builtin_cuda_option_specs,
+#endif
+#if defined(CALIPER_HAVE_ROCTRACER) || defined(CALIPER_HAVE_ROCPROFILER)
+                  builtin_rocm_option_specs,
+#endif
+#ifdef CALIPER_HAVE_LIBDW
+                  builtin_libdw_option_specs,
+#endif
+#ifdef CALIPER_HAVE_PCP
+                  builtin_pcp_option_specs,
+#endif
+#ifdef CALIPER_HAVE_UMPIRE
+                  builtin_umpire_option_specs,
+#endif
+                  builtin_kokkos_option_specs
+          })
     {
-        for (const char** spec_p = builtin_option_specs_list; *spec_p; ++spec_p)
-            add_global_option_specs(*spec_p);
+#ifdef CALIPER_HAVE_PAPI
+#ifdef CALIPER_HAVE_ARCH
+        if (CALIPER_HAVE_ARCH == "sapphirerapids") {
+            builtin_option_specs_list.push_back(builtin_papi_spr_option_specs);
+        } else {
+            builtin_option_specs_list.push_back(builtin_papi_hsw_option_specs);
+        }
+#else
+        builtin_option_specs_list.push_back(builtin_papi_hsw_option_specs);
+#endif
+#endif
+        for (const char* spec_p : builtin_option_specs_list)
+            add_global_option_specs(spec_p);
     }
 };
 
diff --git a/src/caliper/controllers/controllers.cpp b/src/caliper/controllers/controllers.cpp
index 929c0402..5e70da15 100644
--- a/src/caliper/controllers/controllers.cpp
+++ b/src/caliper/controllers/controllers.cpp
@@ -207,6 +207,25 @@ const ConfigManager::ConfigInfo* builtin_controllers_table[] = { &cuda_activity_
                                                                  &spot_controller_info,
                                                                  nullptr };
 
+// Compile-time string comparison
+// Based on code from:
+// https://gist.github.com/ac1dloop/4f7109e8856e5d28e769134bca7d6d7d
+constexpr bool const_strcmp(const char* a, const char* b)
+{
+    // Iterate until one of the strings hits its NULL terminator
+    for (; *a || *b;) {
+        // Check if the current characters in the strings are equal
+        // If not equal, return false
+        // If equal, progress to the next character in the strings
+        if (*a++ != *b++) {
+            return false;
+        }
+    }
+    // If we reach here, every character from the strings were equal,
+    // so we return true
+    return true;
+}
+
 const char* builtin_base_option_specs = R"json(
 [
 {
@@ -1062,7 +1081,7 @@ const char* builtin_pcp_option_specs = R"json(
 ]
 )json";
 
-const char* builtin_papi_option_specs = R"json(
+const char* builtin_papi_hsw_option_specs = R"json(
 [
 {
  "name"        : "topdown.toplevel",
@@ -1141,6 +1160,157 @@ const char* builtin_papi_option_specs = R"json(
 ]
 )json";
 
+const char* builtin_papi_spr_option_specs = R"json(
+    {
+     "name"        : "topdown.toplevel",
+     "description" : "Top-down analysis for Intel CPUs (top level)",
+     "type"        : "bool",
+     "category"    : "metric",
+     "services"    : [ "topdown" ],
+     "config"      : { "CALI_TOPDOWN_LEVEL": "top" },
+     "query"  :
+     [
+      { "level": "local", "select":
+       [
+        "any(topdown.retiring) as \"Retiring\"",
+        "any(topdown.backend_bound) as \"Backend bound\"",
+        "any(topdown.frontend_bound) as \"Frontend bound\"",
+        "any(topdown.bad_speculation) as \"Bad speculation\""
+       ]
+      },
+      { "level": "cross", "select":
+       [
+        "any(any#topdown.retiring) as \"Retiring\"",
+        "any(any#topdown.backend_bound) as \"Backend bound\"",
+        "any(any#topdown.frontend_bound) as \"Frontend bound\"",
+        "any(any#topdown.bad_speculation) as \"Bad speculation\""
+       ]
+      }
+     ]
+    },
+    {
+     "name"        : "topdown.all",
+     "description" : "Top-down analysis for Intel CPUs (all levels)",
+     "type"        : "bool",
+     "category"    : "metric",
+     "services"    : [ "topdown" ],
+     "config"      : { "CALI_TOPDOWN_LEVEL": "all" },
+     "query"  :
+     [
+      { "level": "local", "select":
+       [
+        "any(topdown.retiring) as \"Retiring\"",
+        "any(topdown.backend_bound) as \"Backend bound\"",
+        "any(topdown.frontend_bound) as \"Frontend bound\"",
+        "any(topdown.bad_speculation) as \"Bad speculation\"",
+        "any(topdown.branch_mispredict) as \"Branch mispredict\"",
+        "any(topdown.machine_clears) as \"Machine clears\"",
+        "any(topdown.frontend_latency) as \"Frontend latency\"",
+        "any(topdown.frontend_bandwidth) as \"Frontend bandwidth\"",
+        "any(topdown.memory_bound) as \"Memory bound\"",
+        "any(topdown.core_bound) as \"Core bound\"",
+        "any(topdown.light_ops) as \"Light operations\"",
+        "any(topdown.heavy_ops) as \"Heavy operations\""
+       ]
+      },
+      { "level": "cross", "select":
+       [
+        "any(any#topdown.retiring) as \"Retiring\"",
+        "any(any#topdown.backend_bound) as \"Backend bound\"",
+        "any(any#topdown.frontend_bound) as \"Frontend bound\"",
+        "any(any#topdown.bad_speculation) as \"Bad speculation\"",
+        "any(any#topdown.branch_mispredict) as \"Branch mispredict\"",
+        "any(any#topdown.machine_clears) as \"Machine clears\"",
+        "any(any#topdown.frontend_latency) as \"Frontend latency\"",
+        "any(any#topdown.frontend_bandwidth) as \"Frontend bandwidth\"",
+        "any(any#topdown.memory_bound) as \"Memory bound\"",
+        "any(any#topdown.core_bound) as \"Core bound\"",
+        "any(any#topdown.light_ops) as \"Light operations\"",
+        "any(any#topdown.heavy_ops) as \"Heavy operations\""
+       ]
+      }
+     ]
+    },
+    {
+     "name"        : "topdown-counters.toplevel",
+     "description" : "Raw counter values for Intel top-down analysis (top level)",
+     "type"        : "bool",
+     "category"    : "metric",
+     "services"    : [ "papi" ],
+     "config"      :
+     {
+       "CALI_PAPI_COUNTERS":
+         "perf::slots,perf::topdown-retiring,perf::topdown-bad-spec,perf::topdown-fe-bound,perf::topdown-be-bound,INT_MISC:UOP_DROPPING"
+     },
+     "query"  :
+     [
+      { "level": "local", "select":
+       [
+        "inclusive_sum(sum#papi.slots) as slots",
+        "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring",
+        "inclusive_sum(sum#papi.perf::topdown-bad-spec) as topdown_bad_spec",
+        "inclusive_sum(sum#papi.perf::topdown-fe-bound) as topdown_fe_bound",
+        "inclusive_sum(sum#papi.perf::topdown-be-bound) as topdown_be_bound",
+        "inclusive_sum(sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping"
+       ]
+      },
+      { "level": "cross", "select":
+       [
+        "sum(inclusive#sum#papi.slots) as slots",
+        "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring",
+        "sum(inclusive#sum#papi.perf::topdown-bad-spec) as topdown_bad_spec",
+        "sum(inclusive#sum#papi.perf::topdown-fe-bound) as topdown_fe_bound",
+        "sum(inclusive#sum#papi.perf::topdown-be-bound) as topdown_be_bound",
+        "sum(inclusive#sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping"
+       ]
+      }
+     ]
+    },
+    {
+     "name"        : "topdown-counters.all",
+     "description" : "Raw counter values for Intel top-down analysis (all levels)",
+     "type"        : "bool",
+     "category"    : "metric",
+     "services"    : [ "papi" ],
+     "config"      :
+     {
+       "CALI_PAPI_COUNTERS":
+         "perf::slots,perf::topdown-retiring,perf::topdown-bad-spec,perf::topdown-fe-bound,perf::topdown-be-bound,INT_MISC:UOP_DROPPING,perf_raw::r8400,perf_raw::r8500,perf_raw::r8600,perf_raw::r8700"
+     },
+     "query"  :
+     [
+      { "level": "local", "select":
+       [
+        "inclusive_sum(sum#papi.slots) as slots",
+        "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring",
+        "inclusive_sum(sum#papi.perf::topdown-bad-spec) as topdown_bad_spec",
+        "inclusive_sum(sum#papi.perf::topdown-fe-bound) as topdown_fe_bound",
+        "inclusive_sum(sum#papi.perf::topdown-be-bound) as topdown_be_bound",
+        "inclusive_sum(sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping",
+        "inclusive_sum(sum#papi.perf_raw::r8400) as topdown_heavy_ops",
+        "inclusive_sum(sum#papi.perf_raw::r8500) as topdown_br_mispredict",
+        "inclusive_sum(sum#papi.perf_raw::r8600) as topdown_fetch_lat",
+        "inclusive_sum(sum#papi.perf_raw::r8700) as topdown_mem_bound"
+       ]
+      },
+      { "level": "cross", "select":
+       [
+        "sum(inclusive#sum#papi.slots) as slots",
+        "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring",
+        "sum(inclusive#sum#papi.perf::topdown-bad-spec) as topdown_bad_spec",
+        "sum(inclusive#sum#papi.perf::topdown-fe-bound) as topdown_fe_bound",
+        "sum(inclusive#sum#papi.perf::topdown-be-bound) as topdown_be_bound",
+        "sum(inclusive#sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping",
+        "sum(inclusive#sum#papi.perf_raw::r8400) as topdown_heavy_ops",
+        "sum(inclusive#sum#papi.perf_raw::r8500) as topdown_br_mispredict",
+        "sum(inclusive#sum#papi.perf_raw::r8600) as topdown_fetch_lat",
+        "sum(inclusive#sum#papi.perf_raw::r8700) as topdown_mem_bound"
+       ]
+      }
+     ]
+    }
+  )json";
+
 const char* builtin_kokkos_option_specs = R"json(
 [
 {
diff --git a/src/services/papi/Papi.cpp b/src/services/papi/Papi.cpp
index d2ff3f01..ac3f0d6d 100644
--- a/src/services/papi/Papi.cpp
+++ b/src/services/papi/Papi.cpp
@@ -11,8 +11,8 @@
 #include "caliper/Caliper.h"
 #include "caliper/SnapshotRecord.h"
 
-#include "caliper/common/RuntimeConfig.h"
 #include "caliper/common/Log.h"
+#include "caliper/common/RuntimeConfig.h"
 
 #include "../../common/util/spinlock.hpp"
 
@@ -73,7 +73,8 @@ class PapiService
     unsigned m_num_failed_start;
     unsigned m_num_threads;
 
-    // PAPI component id -> event group info map for constructing the per-thread PAPI EventSets
+    // PAPI component id -> event group info map for constructing the per-thread
+    // PAPI EventSets
     eventset_map_t m_event_groups;
 
     ThreadInfo*    m_thread_list;
@@ -168,18 +169,21 @@ class PapiService
 
             int num = static_cast<int>(p.second->codes.size());
 
-            if (cpi && (num > 4 /* magic number for Intel counter support :-( */ || m_enable_multiplex)) {
-                if (Log::verbosity() >= 2)
-                    Log(2).stream() << "papi: Initializing multiplex support for component " << p.first << " ("
-                                    << cpi->name << ")" << std::endl;
-
-                ret = PAPI_assign_eventset_component(eventset, p.first);
-                if (ret != PAPI_OK)
-                    print_papi_error("PAPI_assign_eventset_component", ret);
-                ret = PAPI_set_multiplex(eventset);
-                if (ret != PAPI_OK)
-                    print_papi_error("PAPI_set_multiplex", ret);
-            }
+            // if (cpi && (num > 4 /* magic number for Intel counter support :-( */ ||
+            // m_enable_multiplex)) {
+            //     if (Log::verbosity() >= 2)
+            //         Log(2).stream() << "papi: Initializing multiplex support for
+            //         component "
+            //                         << p.first << " (" << cpi->name << ")"
+            //                         << std::endl;
+
+            //     ret = PAPI_assign_eventset_component(eventset, p.first);
+            //     if (ret != PAPI_OK)
+            //         print_papi_error("PAPI_assign_eventset_component", ret);
+            //     ret = PAPI_set_multiplex(eventset);
+            //     if (ret != PAPI_OK)
+            //         print_papi_error("PAPI_set_multiplex", ret);
+            // }
 
             ret = PAPI_add_events(eventset, p.second->codes.data(), num);
             if (ret < 0) {
diff --git a/src/services/topdown/IntelTopdown.cpp b/src/services/topdown/IntelTopdown.cpp
index 1724f561..1087c10e 100644
--- a/src/services/topdown/IntelTopdown.cpp
+++ b/src/services/topdown/IntelTopdown.cpp
@@ -20,6 +20,7 @@
 #include <algorithm>
 #include <map>
 #include <sstream>
+#include <vector>
 
 using namespace cali;
 
@@ -37,8 +38,8 @@ class TopdownCalculator
     const char* m_top_counters;
     const char* m_all_counters;
 
-    const char* s_res_top[];
-    const char* s_res_all[];
+    std::vector<const char*> m_res_top;
+    std::vector<const char*> m_res_all;
 
     std::map<std::string, Attribute> m_counter_attrs;
     std::map<std::string, Attribute> m_result_attrs;
@@ -49,8 +50,8 @@ class TopdownCalculator
     {
         Variant ret;
 
-        auto c_it = counter_attrs.find(name);
-        if (c_it == counter_attrs.end())
+        auto c_it = m_counter_attrs.find(name);
+        if (c_it == m_counter_attrs.end())
             return ret;
 
         cali_id_t attr_id = c_it->second.id();
@@ -65,10 +66,26 @@ class TopdownCalculator
         return ret;
     }
 
+    TopdownCalculator(
+        IntelTopdownLevel          level,
+        const char*                top_counters,
+        const char*                all_counters,
+        std::vector<const char*>&& res_top,
+        std::vector<const char*>&& res_all
+    )
+        : m_level(level),
+          m_top_counters(top_counters),
+          m_all_counters(all_counters),
+          m_res_top(res_top),
+          m_res_all(res_all)
+    {}
+
 public:
 
     TopdownCalculator(IntelTopdownLevel level) : m_level(level) {}
 
+    virtual ~TopdownCalculator() = default;
+
     virtual std::vector<Entry> compute_toplevel(const std::vector<Entry>& rec) = 0;
 
     virtual std::size_t get_num_expected_toplevel() const = 0;
@@ -112,14 +129,15 @@ class TopdownCalculator
 
     void make_result_attrs(CaliperMetadataAccessInterface& db)
     {
-        const char** res = (m_level == Top ? m_res_top : m_res_all);
+        std::vector<const char*>& res = (m_level == Top ? m_res_top : m_res_all);
 
-        for (const char** s = res; s && *s; ++s)
-            m_result_attrs[std::string(*s)] = db.create_attribute(
-                std::string("topdown.") + (*s),
+        for (const char* s : res) {
+            m_result_attrs[std::string(s)] = db.create_attribute(
+                std::string("topdown.") + s,
                 CALI_TYPE_DOUBLE,
                 CALI_ATTR_ASVALUE | CALI_ATTR_SKIP_EVENTS
             );
+        }
     }
 
     const std::map<std::string, int>& get_counters_not_found() const { return m_counters_not_found; }
@@ -136,55 +154,58 @@ class TopdownCalculator
     IntelTopdownLevel get_level() const { return m_level; }
 };
 
-class HaswellTopdown
+class HaswellTopdown : public TopdownCalculator
 {
 public:
 
     HaswellTopdown(IntelTopdownLevel level)
-        : TopdownCalculator(level),
-          m_top_counters(
-              "CPU_CLK_THREAD_UNHALTED:THREAD_P"
-              ",IDQ_UOPS_NOT_DELIVERED:CORE"
-              ",INT_MISC:RECOVERY_CYCLES"
-              ",UOPS_ISSUED:ANY"
-              ",UOPS_RETIRED:RETIRE_SLOTS"
-          ),
-          m_all_counters(
-              "BR_MISP_RETIRED:ALL_BRANCHES"
-              ",CPU_CLK_THREAD_UNHALTED:THREAD_P"
-              ",CYCLE_ACTIVITY:CYCLES_NO_EXECUTE"
-              ",CYCLE_ACTIVITY:STALLS_L1D_PENDING"
-              ",CYCLE_ACTIVITY:STALLS_L2_PENDING"
-              ",CYCLE_ACTIVITY:STALLS_LDM_PENDING"
-              ",IDQ_UOPS_NOT_DELIVERED:CORE"
-              ",IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE"
-              ",INT_MISC:RECOVERY_CYCLES"
-              ",MACHINE_CLEARS:COUNT"
-              ",MEM_LOAD_UOPS_RETIRED:L3_HIT"
-              ",MEM_LOAD_UOPS_RETIRED:L3_MISS"
-              ",UOPS_EXECUTED:CORE_CYCLES_GE_1"
-              ",UOPS_EXECUTED:CORE_CYCLES_GE_2"
-              ",UOPS_ISSUED:ANY"
-              ",UOPS_RETIRED:RETIRE_SLOTS"
-          ),
-          m_res_top({ "retiring", "backend_bound", "frontend_bound", "bad_speculation", nullptr }),
-          m_res_all({ "retiring",
-                      "backend_bound",
-                      "frontend_bound",
-                      "bad_speculation",
-                      "branch_mispredict",
-                      "machine_clears",
-                      "frontend_latency",
-                      "frontend_bandwidth",
-                      "memory_bound",
-                      "core_bound",
-                      "ext_mem_bound",
-                      "l1_bound",
-                      "l2_bound",
-                      "l3_bound",
-                      nullptr })
+        : TopdownCalculator(
+            level,
+            // top_counters
+            "CPU_CLK_THREAD_UNHALTED:THREAD_P"
+            ",IDQ_UOPS_NOT_DELIVERED:CORE"
+            ",INT_MISC:RECOVERY_CYCLES"
+            ",UOPS_ISSUED:ANY"
+            ",UOPS_RETIRED:RETIRE_SLOTS",
+            // all_counters
+            "BR_MISP_RETIRED:ALL_BRANCHES"
+            ",CPU_CLK_THREAD_UNHALTED:THREAD_P"
+            ",CYCLE_ACTIVITY:CYCLES_NO_EXECUTE"
+            ",CYCLE_ACTIVITY:STALLS_L1D_PENDING"
+            ",CYCLE_ACTIVITY:STALLS_L2_PENDING"
+            ",CYCLE_ACTIVITY:STALLS_LDM_PENDING"
+            ",IDQ_UOPS_NOT_DELIVERED:CORE"
+            ",IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE"
+            ",INT_MISC:RECOVERY_CYCLES"
+            ",MACHINE_CLEARS:COUNT"
+            ",MEM_LOAD_UOPS_RETIRED:L3_HIT"
+            ",MEM_LOAD_UOPS_RETIRED:L3_MISS"
+            ",UOPS_EXECUTED:CORE_CYCLES_GE_1"
+            ",UOPS_EXECUTED:CORE_CYCLES_GE_2"
+            ",UOPS_ISSUED:ANY"
+            ",UOPS_RETIRED:RETIRE_SLOTS",
+            // res_top
+            { "retiring", "backend_bound", "frontend_bound", "bad_speculation" },
+            // res_all
+            { "retiring",
+              "backend_bound",
+              "frontend_bound",
+              "bad_speculation",
+              "branch_mispredict",
+              "machine_clears",
+              "frontend_latency",
+              "frontend_bandwidth",
+              "memory_bound",
+              "core_bound",
+              "ext_mem_bound",
+              "l1_bound",
+              "l2_bound",
+              "l3_bound" }
+        )
     {}
 
+    virtual ~HaswellTopdown() = default;
+
     virtual std::vector<Entry> compute_toplevel(const std::vector<Entry>& rec) override
     {
         std::vector<Entry> ret;
@@ -340,59 +361,62 @@ class HaswellTopdown
     virtual std::size_t get_num_expected_bad_speculation() const override { return 2; }
 };
 
-class SapphireRapidsTopdown
+class SapphireRapidsTopdown : public TopdownCalculator
 {
 public:
 
     SapphireRapidsTopdown(IntelTopdownLevel level)
-        : TopdownCalculator(level),
-          m_top_counters(
-              "perf::slots"
-              ",perf::topdown-retiring"
-              ",perf::topdown-bad-spec"
-              ",perf::topdown-fe-bound"
-              ",perf::topdown-be-bound"
-              ",INT_MISC:UOP_DROPPING"
-          ),
-          m_all_counters(
-              "perf::slots"
-              ",perf::topdown-retiring"
-              ",perf::topdown-bad-spec"
-              ",perf::topdown-fe-bound"
-              ",perf::topdown-be-bound"
-              ",INT_MISC:UOP_DROPPING"
-              ",perf_raw::r8400" // topdown-heavy-ops
-              ",perf_raw::r8500" // topdown-br-mispredict
-              ",perf_raw::r8600" // topdown-fetch-lat
-              ",perf_raw::r8700"
-          ), // topdown-mem-bound
-          m_res_top({ "retiring", "backend_bound", "frontend_bound", "bad_speculation", nullptr }),
-          m_res_all({ "retiring",
-                      "backend_bound",
-                      "frontend_bound",
-                      "bad_speculation",
-                      "branch_mispredict",
-                      "machine_clears",
-                      "frontend_latency",
-                      "frontend_bandwidth",
-                      "memory_bound",
-                      "core_bound",
-                      "light_ops",
-                      "heavy_ops",
-                      nullptr })
+        : TopdownCalculator(
+            level,
+            // top_counters
+            "perf::slots"
+            ",perf::topdown-retiring"
+            ",perf::topdown-bad-spec"
+            ",perf::topdown-fe-bound"
+            ",perf::topdown-be-bound"
+            ",INT_MISC:UOP_DROPPING",
+            // all_counters
+            "perf::slots"
+            ",perf::topdown-retiring"
+            ",perf::topdown-bad-spec"
+            ",perf::topdown-fe-bound"
+            ",perf::topdown-be-bound"
+            ",INT_MISC:UOP_DROPPING"
+            ",perf_raw::r8400"  // topdown-heavy-ops
+            ",perf_raw::r8500"  // topdown-br-mispredict
+            ",perf_raw::r8600"  // topdown-fetch-lat
+            ",perf_raw::r8700", // topdown-mem-bound
+            // res_top
+            { "retiring", "backend_bound", "frontend_bound", "bad_speculation" },
+            // res_all
+            { "retiring",
+              "backend_bound",
+              "frontend_bound",
+              "bad_speculation",
+              "branch_mispredict",
+              "machine_clears",
+              "frontend_latency",
+              "frontend_bandwidth",
+              "memory_bound",
+              "core_bound",
+              "light_ops",
+              "heavy_ops" }
+        )
     {}
 
+    virtual ~SapphireRapidsTopdown() = default;
+
     virtual std::vector<Entry> compute_toplevel(const std::vector<Entry>& rec) override
     {
         std::vector<Entry> ret;
 
         // Get PAPI metrics for toplevel calculations
+        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
         Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
         Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
         Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
         Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
-        Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MSC:UOP_DROPPING");
-        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+        Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
 
         // Check if any Variant is empty (use .empty())
         bool is_incomplete = v_fe_bound.empty() || v_be_bound.emtpy() || v_bad_spec.empty() || v_retiring.empty()
@@ -411,10 +435,10 @@ class SapphireRapidsTopdown
         double toplevel_sum =
             (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
 
-        double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
-        double frontend_bound =
-            (v_fe_bound.to_double() / toplevel_sum) - (v_int_misc_uop_dropping / v_slots_or_info_thread_slots);
-        double backend_bound   = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+        double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
+        double frontend_bound = (v_fe_bound.to_double() / toplevel_sum)
+                                - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double());
+        double backend_bound = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
         double bad_speculation = std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
 
         // Add toplevel metrics to vector of Entry
@@ -434,11 +458,11 @@ class SapphireRapidsTopdown
         std::vector<Entry> ret;
 
         // Get PAPI metrics for toplevel calculations
+        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
         Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
         Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
         Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
         Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
-        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
         Variant v_heavy_ops                  = get_val_from_rec(rec, "perf_raw::r8400");
 
         // Check if any Variant is empty (use .empty())
@@ -452,9 +476,9 @@ class SapphireRapidsTopdown
         double toplevel_sum =
             (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
         // Copied from compute_toplevel
-        double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+        double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
 
-        double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+        double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
         double light_ops = std::max(0.0, retiring - heavy_ops);
 
         // Add toplevel metrics to vector of Entry
@@ -472,11 +496,11 @@ class SapphireRapidsTopdown
         std::vector<Entry> ret;
 
         // Get PAPI metrics for toplevel calculations
+        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
         Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
         Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
         Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
         Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
-        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
         Variant v_memory_bound               = get_val_from_rec(rec, "perf_raw::r8700");
 
         // Check if any Variant is empty (use .empty())
@@ -490,15 +514,16 @@ class SapphireRapidsTopdown
         double toplevel_sum =
             (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
         // Copied from compute_toplevel
-        double backend_bound = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+        double backend_bound = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
 
-        double memory_bound = (v_memory_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
-        double core_bound   = std::max(0.0, backend_bound - memory_bound);
+        double memory_bound =
+            (v_memory_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
+        double core_bound = std::max(0.0, backend_bound - memory_bound);
 
         // Add toplevel metrics to vector of Entry
         ret.reserve(2);
-        ret.push_back(Entry(m_result_attrs["backend_bound"], Variant(std::max(backend_bound, 0.0))));
         ret.push_back(Entry(m_result_attrs["memory_bound"], Variant(std::max(memory_bound, 0.0))));
+        ret.push_back(Entry(m_result_attrs["core_bound"], Variant(std::max(core_bound, 0.0))));
 
         return ret;
     }
@@ -510,18 +535,18 @@ class SapphireRapidsTopdown
         std::vector<Entry> ret;
 
         // Get PAPI metrics for toplevel calculations
+        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
         Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
         Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
         Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
         Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
-        Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MSC:UOP_DROPPING");
-        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+        Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
         Variant v_fetch_latency              = get_val_from_rec(rec, "perf_raw::r8600");
 
         // Check if any Variant is empty (use .empty())
-        bool is_incomplete = v_fe_bound.empty() || v_be_bound.emtpy() || v_bad_spec.empty() || v_retiring.empty()
+        bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || v_retiring.empty()
                              || v_int_misc_uop_dropping.empty() || v_slots_or_info_thread_slots.empty()
-                             || v_memory_bound.empty();
+                             || v_fetch_latency.empty();
 
         // Check if bad values were obtained
         if (is_incomplete)
@@ -530,11 +555,12 @@ class SapphireRapidsTopdown
         double toplevel_sum =
             (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
         // Copied from compute_toplevel
-        double frontend_bound =
-            (v_fe_bound.to_double() / toplevel_sum) - (v_int_misc_uop_dropping / v_slots_or_info_thread_slots);
+        double frontend_bound = (v_fe_bound.to_double() / toplevel_sum)
+                                - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double());
+
+        double fetch_latency = (v_fetch_latency.to_double() / toplevel_sum)
+                               - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double());
 
-        double fetch_latency =
-            (v_fetch_latency.to_double() / toplevel_sum) - (v_int_misc_uop_dropping * v_slots_or_info_thread_slots);
         double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency);
 
         // Add toplevel metrics to vector of Entry
@@ -552,12 +578,12 @@ class SapphireRapidsTopdown
         std::vector<Entry> ret;
 
         // Get PAPI metrics for toplevel calculations
+        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
         Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
         Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
         Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
         Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
-        Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MSC:UOP_DROPPING");
-        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+        Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
         Variant v_branch_mispredict          = get_val_from_rec(rec, "perf_raw::r8500");
 
         // Check if any Variant is empty (use .empty())
@@ -573,14 +599,14 @@ class SapphireRapidsTopdown
         double toplevel_sum =
             (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
 
-        double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
-        double frontend_bound =
-            (v_fe_bound.to_double() / toplevel_sum) - (v_int_misc_uop_dropping / v_slots_or_info_thread_slots);
-        double backend_bound   = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+        double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
+        double frontend_bound = (v_fe_bound.to_double() / toplevel_sum)
+                                - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double());
+        double backend_bound = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
         double bad_speculation = std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
 
         double branch_mispredict =
-            (v_branch_mispredict.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots);
+            (v_branch_mispredict.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
         double machine_clears = std::max(0.0, bad_speculation - branch_mispredict);
 
         // Add toplevel metrics to vector of Entry
@@ -607,157 +633,168 @@ class IntelTopdown
     unsigned num_ret_computed;
     unsigned num_ret_skipped;
 
-    IntelTopdownLevel level;
+    IntelTopdownLevel m_level;
 
     TopdownCalculator* m_calculator;
 
+    bool find_counter_attrs(CaliperMetadataAccessInterface& db) { return m_calculator->find_counter_attrs(db); }
+
+    void make_result_attrs(CaliperMetadataAccessInterface& db) { m_calculator->make_result_attrs(db); }
+
     void postprocess_snapshot_cb(std::vector<Entry>& rec)
     {
         std::vector<Entry> result = m_calculator->compute_toplevel(rec);
 
-        if (result.size() != m_calculator->get_num_expected_toplevel())
+        if (result.size() != m_calculator->get_num_expected_toplevel()) {
             ++num_top_skipped;
-        else {
+        } else {
             rec.insert(rec.end(), result.begin(), result.end());
             ++num_top_computed;
         }
 
-        if (level == All) {
+        if (m_level == All) {
             result = m_calculator->compute_backend_bound(rec);
 
-            if (result.size() != m_calculator->get_num_expected_backend_bound())
+            if (result.size() != m_calculator->get_num_expected_backend_bound()) {
                 ++num_be_skipped;
-            else {
+            } else {
                 rec.insert(rec.end(), result.begin(), result.end());
                 ++num_be_computed;
             }
 
             result = m_calculator->compute_frontend_bound(rec);
 
-            if (result.size() != m_calculator->get_num_expected_frontend_bound())
+            if (result.size() != m_calculator->get_num_expected_frontend_bound()) {
                 ++num_fe_skipped;
-            else {
+            } else {
                 rec.insert(rec.end(), result.begin(), result.end());
                 ++num_fe_computed;
             }
 
             result = m_calculator->compute_bad_speculation(rec);
 
-            if (result.size() != m_calculator->get_num_expected_bad_speculation())
+            if (result.size() != m_calculator->get_num_expected_bad_speculation()) {
                 ++num_bsp_skipped;
-            else {
+            } else {
                 rec.insert(rec.end(), result.begin(), result.end());
                 ++num_bsp_computed;
             }
 
             result = m_calculator->compute_retiring(rec);
 
-            if (result.size() != m_calculator->get_num_expected_retiring())
+            if (result.size() != m_calculator->get_num_expected_retiring()) {
                 ++num_ret_skipped;
-            else {
+            } else {
                 rec.insert(rec.end(), result.begin(), result.end());
                 ++num_ret_computed;
             }
         }
+    }
 
-        void finish_cb(Caliper * c, Channel * channel)
-        {
-            Log(1).stream() << channel->name() << ": topdown: Computed topdown metrics for " << num_top_computed
-                            << " records, skipped " << num_top_skipped << std::endl;
-
-            if (Log::verbosity() >= 2) {
-                Log(2).stream() << channel->name() << ": topdown: Records processed per topdown level: "
-                                << "\n  top:      " << num_top_computed << " computed, " << num_top_skipped
-                                << " skipped,"
-                                << "\n  bad spec: " << num_bsp_computed << " computed, " << num_bsp_skipped
-                                << " skipped,"
-                                << "\n  frontend: " << num_bsp_computed << " computed, " << num_bsp_skipped
-                                << " skipped,"
-                                << "\n  backend:  " << num_bsp_computed << " computed, " << num_bsp_skipped
-                                << " skipped." << std::endl;
-
-                const std::map<std::string, int>& counters_not_found = m_calculator->get_counters_not_found();
-
-                if (!counters_not_found.empty()) {
-                    std::ostringstream os;
-                    for (auto& p : counters_not_found)
-                        os << "\n  " << p.first << ": " << p.second;
-                    Log(2).stream() << channel->name() << ": topdown: Counters not found:" << os.str() << std::endl;
-                }
+    void finish_cb(Caliper* c, Channel* channel)
+    {
+        Log(1).stream() << channel->name() << ": topdown: Computed topdown metrics for " << num_top_computed
+                        << " records, skipped " << num_top_skipped << std::endl;
+
+        if (Log::verbosity() >= 2) {
+            Log(2).stream() << channel->name() << ": topdown: Records processed per topdown level: "
+                            << "\n  top:      " << num_top_computed << " computed, " << num_top_skipped << " skipped,"
+                            << "\n  bad spec: " << num_bsp_computed << " computed, " << num_bsp_skipped << " skipped,"
+                            << "\n  frontend: " << num_bsp_computed << " computed, " << num_bsp_skipped << " skipped,"
+                            << "\n  backend:  " << num_bsp_computed << " computed, " << num_bsp_skipped << " skipped."
+                            << std::endl;
+
+            const std::map<std::string, int>& counters_not_found = m_calculator->get_counters_not_found();
+
+            if (!counters_not_found.empty()) {
+                std::ostringstream os;
+                for (auto& p : counters_not_found)
+                    os << "\n  " << p.first << ": " << p.second;
+                Log(2).stream() << channel->name() << ": topdown: Counters not found:" << os.str() << std::endl;
             }
         }
+    }
 
-        explicit IntelTopdown(TopdownCalculator * calculator)
-            : num_top_computed(0),
-              num_top_skipped(0),
-              num_be_computed(0),
-              num_be_skipped(0),
-              num_fe_computed(0),
-              num_fe_skipped(0),
-              num_bsp_computed(0),
-              num_bsp_skipped(0),
-              level(calculator->get_level()),
-              m_calculator(calculator)
-        {}
-
-        ~IntelTopdown()
-        {
-            if (m_calculator != nullptr) {
-                delete m_calculator;
-            }
+    explicit IntelTopdown(TopdownCalculator* calculator)
+        : num_top_computed(0),
+          num_top_skipped(0),
+          num_be_computed(0),
+          num_be_skipped(0),
+          num_fe_computed(0),
+          num_fe_skipped(0),
+          num_bsp_computed(0),
+          num_bsp_skipped(0),
+          m_level(calculator->get_level()),
+          m_calculator(calculator)
+    {}
+
+    ~IntelTopdown()
+    {
+        if (m_calculator != nullptr) {
+            delete m_calculator;
         }
+    }
 
-    public:
+public:
 
-        static const char* s_spec;
+    static const char* s_spec;
 
-        static void intel_topdown_register(Caliper * c, Channel * channel)
-        {
-            Level level = Top;
+    static void intel_topdown_register(Caliper* c, Channel* channel)
+    {
+        IntelTopdownLevel level = Top;
 
-            auto        config = services::init_config_from_spec(channel->config(), s_spec);
-            std::string lvlcfg = config.get("level").to_string();
+        auto        config = services::init_config_from_spec(channel->config(), s_spec);
+        std::string lvlcfg = config.get("level").to_string();
 
-            if (lvlcfg == "all") {
-                level = All;
-            } else if (lvlcfg != "top") {
-                Log(0).stream() << channel->name() << ": topdown: Unknown level \"" << lvlcfg << "\", skipping topdown"
-                                << std::endl;
-                return;
-            }
+        if (lvlcfg == "all") {
+            level = All;
+        } else if (lvlcfg != "top") {
+            Log(0).stream() << channel->name() << ": topdown: Unknown level \"" << lvlcfg << "\", skipping topdown"
+                            << std::endl;
+            return;
+        }
 
-            // TODO Add logic to select correct TopdownCalculator
-            TopdownCalculator* calculator = new HaswellTopdown(level);
+        TopdownCalculator* calculator;
 
-            channel->config().set("CALI_PAPI_COUNTERS", calculator->get_counters());
+#if defined(CALIPER_HAVE_ARCH)
+        if (std::string(CALIPER_HAVE_ARCH) == "sapphirerapids") {
+            calculator = new SapphireRapidsTopdown(level);
+        } else {
+#endif
+            calculator = new HaswellTopdown(level); // Default type of calculation
+#if defined(CALIPER_HAVE_ARCH)
+        }
+#endif
 
-            if (!cali::services::register_service(c, channel, "papi")) {
-                Log(0).stream() << channel->name() << ": topdown: Unable to register papi service, skipping topdown"
-                                << std::endl;
-                return;
-            }
+        channel->config().set("CALI_PAPI_COUNTERS", calculator->get_counters());
 
-            IntelTopdown* instance = new IntelTopdown(calculator);
-
-            channel->events().pre_flush_evt.connect([instance](Caliper* c, Channel* channel, SnapshotView) {
-                if (instance->find_counter_attrs(*c))
-                    instance->make_result_attrs(*c);
-                else
-                    Log(0).stream() << channel->name() << ": topdown: Could not find counter attributes!" << std::endl;
-            });
-            channel->events().postprocess_snapshot.connect([instance](Caliper*, Channel*, std::vector<Entry>& rec) {
-                instance->postprocess_snapshot_cb(rec);
-            });
-            channel->events().finish_evt.connect([instance](Caliper* c, Channel* channel) {
-                instance->finish_cb(c, channel);
-                delete instance;
-            });
-
-            Log(1).stream() << channel->name() << ": Registered topdown service. Level: " << lvlcfg << "." << std::endl;
+        if (!cali::services::register_service(c, channel, "papi")) {
+            Log(0).stream() << channel->name() << ": topdown: Unable to register papi service, skipping topdown"
+                            << std::endl;
+            return;
         }
-    };
 
-    const char* IntelTopdown::s_spec = R"json(
+        IntelTopdown* instance = new IntelTopdown(calculator);
+
+        channel->events().pre_flush_evt.connect([instance](Caliper* c, Channel* channel, SnapshotView) {
+            if (instance->find_counter_attrs(*c))
+                instance->make_result_attrs(*c);
+            else
+                Log(0).stream() << channel->name() << ": topdown: Could not find counter attributes!" << std::endl;
+        });
+        channel->events().postprocess_snapshot.connect([instance](Caliper*, Channel*, std::vector<Entry>& rec) {
+            instance->postprocess_snapshot_cb(rec);
+        });
+        channel->events().finish_evt.connect([instance](Caliper* c, Channel* channel) {
+            instance->finish_cb(c, channel);
+            delete instance;
+        });
+
+        Log(1).stream() << channel->name() << ": Registered topdown service. Level: " << lvlcfg << "." << std::endl;
+    }
+};
+
+const char* IntelTopdown::s_spec = R"json(
 {   "name": "topdown",
     "description": "Record PAPI counters and compute top-down analysis for Intel CPUs",
     "config": [
@@ -776,5 +813,5 @@ namespace cali
 
 {
 
-    CaliperService topdown_service { ::IntelTopdown::s_spec, ::IntelTopdown::intel_topdown_register };
+CaliperService topdown_service { ::IntelTopdown::s_spec, ::IntelTopdown::intel_topdown_register };
 }

From c09c473474f3b3ac70a98ed5edb700a5a440358c Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Fri, 4 Oct 2024 16:37:04 -0400
Subject: [PATCH 04/11] Splits TopdownCalculator and subclasses into own files
 to simplify implementation

---
 src/services/topdown/CMakeLists.txt           |   9 +-
 src/services/topdown/HaswellTopdown.cpp       | 249 +++++++
 src/services/topdown/HaswellTopdown.h         |  44 ++
 src/services/topdown/IntelTopdown.cpp         | 615 +-----------------
 .../topdown/SapphireRapidsTopdown.cpp         | 299 +++++++++
 src/services/topdown/SapphireRapidsTopdown.h  |  44 ++
 src/services/topdown/TopdownCalculator.cpp    |  90 +++
 src/services/topdown/TopdownCalculator.h      |  80 +++
 8 files changed, 823 insertions(+), 607 deletions(-)
 create mode 100644 src/services/topdown/HaswellTopdown.cpp
 create mode 100644 src/services/topdown/HaswellTopdown.h
 create mode 100644 src/services/topdown/SapphireRapidsTopdown.cpp
 create mode 100644 src/services/topdown/SapphireRapidsTopdown.h
 create mode 100644 src/services/topdown/TopdownCalculator.cpp
 create mode 100644 src/services/topdown/TopdownCalculator.h

diff --git a/src/services/topdown/CMakeLists.txt b/src/services/topdown/CMakeLists.txt
index 7adbe68b..78a8bef5 100644
--- a/src/services/topdown/CMakeLists.txt
+++ b/src/services/topdown/CMakeLists.txt
@@ -1,5 +1,10 @@
 set(CALIPER_TOPDOWN_SOURCES
-  IntelTopdown.cpp)
+  IntelTopdown.cpp
+  TopdownCalulator.cpp
+  HaswellTopdown.cpp
+  SapphireRapidsTopdown.cpp)
 
-add_service_sources(${CALIPER_TOPDOWN_SOURCES})
+add_library(caliper-topdown OBJECT ${CALIPER_TOPDOWN_SOURCES})
+
+add_service_objlib("caliper-topdown")
 add_caliper_service("topdown CALIPER_HAVE_PAPI")
diff --git a/src/services/topdown/HaswellTopdown.cpp b/src/services/topdown/HaswellTopdown.cpp
new file mode 100644
index 00000000..a04551c8
--- /dev/null
+++ b/src/services/topdown/HaswellTopdown.cpp
@@ -0,0 +1,249 @@
+#include "HaswellTopdown.h"
+
+#include <algorithm>
+
+namespace cali {
+namespace topdown {
+
+HaswellTopdown::HaswellTopdown(IntelTopdownLevel level)
+    : cali::topdown::TopdownCalculator(
+          level,
+          // top_counters
+          "CPU_CLK_THREAD_UNHALTED:THREAD_P"
+          ",IDQ_UOPS_NOT_DELIVERED:CORE"
+          ",INT_MISC:RECOVERY_CYCLES"
+          ",UOPS_ISSUED:ANY"
+          ",UOPS_RETIRED:RETIRE_SLOTS",
+          // all_counters
+          "BR_MISP_RETIRED:ALL_BRANCHES"
+          ",CPU_CLK_THREAD_UNHALTED:THREAD_P"
+          ",CYCLE_ACTIVITY:CYCLES_NO_EXECUTE"
+          ",CYCLE_ACTIVITY:STALLS_L1D_PENDING"
+          ",CYCLE_ACTIVITY:STALLS_L2_PENDING"
+          ",CYCLE_ACTIVITY:STALLS_LDM_PENDING"
+          ",IDQ_UOPS_NOT_DELIVERED:CORE"
+          ",IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE"
+          ",INT_MISC:RECOVERY_CYCLES"
+          ",MACHINE_CLEARS:COUNT"
+          ",MEM_LOAD_UOPS_RETIRED:L3_HIT"
+          ",MEM_LOAD_UOPS_RETIRED:L3_MISS"
+          ",UOPS_EXECUTED:CORE_CYCLES_GE_1"
+          ",UOPS_EXECUTED:CORE_CYCLES_GE_2"
+          ",UOPS_ISSUED:ANY"
+          ",UOPS_RETIRED:RETIRE_SLOTS",
+          // res_top
+          {"retiring", "backend_bound", "frontend_bound", "bad_speculation"},
+          // res_all
+          {"retiring", "backend_bound", "frontend_bound", "bad_speculation",
+           "branch_mispredict", "machine_clears", "frontend_latency",
+           "frontend_bandwidth", "memory_bound", "core_bound", "ext_mem_bound",
+           "l1_bound", "l2_bound", "l3_bound"}) {}
+
+std::vector<Entry>
+HaswellTopdown::compute_toplevel(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  Variant v_cpu_clk_unhalted_thread_p =
+      get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P");
+  Variant v_uops_retired_retire_slots =
+      get_val_from_rec(rec, "UOPS_RETIRED:RETIRE_SLOTS");
+  Variant v_uops_issued_any = get_val_from_rec(rec, "UOPS_ISSUED:ANY");
+  Variant v_int_misc_recovery_cycles =
+      get_val_from_rec(rec, "INT_MISC:RECOVERY_CYCLES");
+  Variant v_idq_uops_not_delivered_core =
+      get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CORE");
+
+  bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() ||
+                       v_uops_retired_retire_slots.empty() ||
+                       v_uops_issued_any.empty() ||
+                       v_int_misc_recovery_cycles.empty() ||
+                       v_idq_uops_not_delivered_core.empty();
+  bool is_nonzero = v_cpu_clk_unhalted_thread_p.to_double() > 0.0 &&
+                    v_uops_retired_retire_slots.to_double() > 0.0 &&
+                    v_uops_issued_any.to_double() > 0.0 &&
+                    v_int_misc_recovery_cycles.to_double() > 0.0 &&
+                    v_idq_uops_not_delivered_core.to_double() > 0.0;
+
+  double slots = 4.0 * v_cpu_clk_unhalted_thread_p.to_double();
+
+  if (is_incomplete || !is_nonzero || slots < 1.0)
+    return ret;
+
+  double retiring = v_uops_retired_retire_slots.to_double() / slots;
+  double bad_speculation =
+      (v_uops_issued_any.to_double() - v_uops_retired_retire_slots.to_double() +
+       4.0 * v_int_misc_recovery_cycles.to_double()) /
+      slots;
+  double frontend_bound = v_idq_uops_not_delivered_core.to_double() / slots;
+  double backend_bound = 1.0 - (retiring + bad_speculation + frontend_bound);
+
+  ret.reserve(4);
+  ret.push_back(
+      Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
+  ret.push_back(Entry(m_result_attrs["backend_bound"],
+                      Variant(std::max(backend_bound, 0.0))));
+  ret.push_back(Entry(m_result_attrs["frontend_bound"],
+                      Variant(std::max(frontend_bound, 0.0))));
+  ret.push_back(Entry(m_result_attrs["bad_speculation"],
+                      Variant(std::max(bad_speculation, 0.0))));
+
+  return ret;
+}
+
+std::size_t HaswellTopdown::get_num_expected_toplevel() const { return 4; }
+
+std::vector<Entry>
+HaswellTopdown::compute_retiring(const std::vector<Entry> &rec) {
+  return {};
+}
+
+std::size_t HaswellTopdown::get_num_expected_retiring() const { return 0; }
+
+std::vector<Entry>
+HaswellTopdown::compute_backend_bound(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  Variant v_cpu_clk_unhalted_thread_p =
+      get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P");
+  Variant v_cycle_activity_stalls_ldm_pending =
+      get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_LDM_PENDING");
+  Variant v_cycle_activity_cycles_no_execute =
+      get_val_from_rec(rec, "CYCLE_ACTIVITY:CYCLES_NO_EXECUTE");
+  Variant v_uops_executed_core_cycles_ge_1 =
+      get_val_from_rec(rec, "UOPS_EXECUTED:CORE_CYCLES_GE_1");
+  Variant v_uops_executed_core_cycles_ge_2 =
+      get_val_from_rec(rec, "UOPS_EXECUTED:CORE_CYCLES_GE_2");
+  Variant v_mem_load_uops_retired_l3_miss =
+      get_val_from_rec(rec, "MEM_LOAD_UOPS_RETIRED:L3_MISS");
+  Variant v_mem_load_uops_retired_l3_hit =
+      get_val_from_rec(rec, "MEM_LOAD_UOPS_RETIRED:L3_HIT");
+  Variant v_cycle_activity_stalls_l2_pending =
+      get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_L2_PENDING");
+  Variant v_cycle_activity_stalls_l1d_pending =
+      get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_L1D_PENDING");
+
+  bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() ||
+                       v_cycle_activity_stalls_ldm_pending.empty() ||
+                       v_cycle_activity_cycles_no_execute.empty() ||
+                       v_uops_executed_core_cycles_ge_1.empty() ||
+                       v_uops_executed_core_cycles_ge_2.empty() ||
+                       v_mem_load_uops_retired_l3_miss.empty() ||
+                       v_mem_load_uops_retired_l3_hit.empty() ||
+                       v_cycle_activity_stalls_l2_pending.empty() ||
+                       v_cycle_activity_stalls_l1d_pending.empty();
+
+  double clocks = v_cpu_clk_unhalted_thread_p.to_double();
+
+  if (is_incomplete || !(clocks > 1.0))
+    return ret;
+
+  double memory_bound =
+      v_cycle_activity_stalls_ldm_pending.to_double() / clocks;
+  double be_bound_at_exe = (v_cycle_activity_cycles_no_execute.to_double() +
+                            v_uops_executed_core_cycles_ge_1.to_double() -
+                            v_uops_executed_core_cycles_ge_2.to_double()) /
+                           clocks;
+  double l3_tot = v_mem_load_uops_retired_l3_hit.to_double() +
+                  7.0 * v_mem_load_uops_retired_l3_miss.to_double();
+  double l3_hit_fraction = 0.0;
+  double l3_miss_fraction = 0.0;
+  if (l3_tot > 0.0) {
+    l3_hit_fraction = v_mem_load_uops_retired_l3_hit.to_double() / l3_tot;
+    l3_miss_fraction = v_mem_load_uops_retired_l3_miss.to_double() / l3_tot;
+  }
+  double ext_mem_bound = v_cycle_activity_stalls_l2_pending.to_double() *
+                         l3_miss_fraction / clocks;
+  double l1_bound = (v_cycle_activity_stalls_ldm_pending.to_double() -
+                     v_cycle_activity_stalls_l1d_pending.to_double()) /
+                    clocks;
+  double l2_bound = (v_cycle_activity_stalls_l1d_pending.to_double() -
+                     v_cycle_activity_stalls_l2_pending.to_double()) /
+                    clocks;
+  double l3_bound =
+      v_cycle_activity_stalls_l2_pending.to_double() * l3_hit_fraction / clocks;
+
+  ret.reserve(6);
+  ret.push_back(Entry(m_result_attrs["memory_bound"], Variant(memory_bound)));
+  ret.push_back(Entry(m_result_attrs["core_bound"],
+                      Variant(be_bound_at_exe - memory_bound)));
+  ret.push_back(Entry(m_result_attrs["ext_mem_bound"], Variant(ext_mem_bound)));
+  ret.push_back(Entry(m_result_attrs["l1_bound"], Variant(l1_bound)));
+  ret.push_back(Entry(m_result_attrs["l2_bound"], Variant(l2_bound)));
+  ret.push_back(Entry(m_result_attrs["l3_bound"], Variant(l3_bound)));
+
+  return ret;
+}
+
+std::size_t HaswellTopdown::get_num_expected_backend_bound() const { return 6; }
+
+std::vector<Entry>
+HaswellTopdown::compute_frontend_bound(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  Variant v_cpu_clk_unhalted_thread_p =
+      get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P");
+  Variant v_idq_uops_not_delivered =
+      get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE");
+
+  bool is_incomplete =
+      v_cpu_clk_unhalted_thread_p.empty() || v_idq_uops_not_delivered.empty();
+
+  double clocks = v_cpu_clk_unhalted_thread_p.to_double();
+  double uops = v_idq_uops_not_delivered.to_double();
+
+  if (is_incomplete || clocks < 1.0 || uops > clocks)
+    return ret;
+
+  double fe_latency = uops / clocks;
+
+  ret.reserve(2);
+  ret.push_back(Entry(m_result_attrs["frontend_latency"], Variant(fe_latency)));
+  ret.push_back(
+      Entry(m_result_attrs["frontend_bandwidth"], Variant(1.0 - fe_latency)));
+
+  return ret;
+}
+
+std::size_t HaswellTopdown::get_num_expected_frontend_bound() const {
+  return 2;
+}
+
+std::vector<Entry>
+HaswellTopdown::compute_bad_speculation(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  Variant v_br_misp_retired_all_branches =
+      get_val_from_rec(rec, "BR_MISP_RETIRED:ALL_BRANCHES");
+  Variant v_machine_clears_count =
+      get_val_from_rec(rec, "MACHINE_CLEARS:COUNT");
+
+  bool is_incomplete =
+      v_br_misp_retired_all_branches.empty() || v_machine_clears_count.empty();
+
+  double br_misp_retired_all_branches =
+      v_br_misp_retired_all_branches.to_double();
+  double machine_clears_count = v_machine_clears_count.to_double();
+
+  if (is_incomplete ||
+      !(br_misp_retired_all_branches + machine_clears_count > 1.0))
+    return ret;
+
+  double branch_mispredict =
+      br_misp_retired_all_branches /
+      (br_misp_retired_all_branches + machine_clears_count);
+
+  ret.reserve(2);
+  ret.push_back(
+      Entry(m_result_attrs["branch_mispredict"], Variant(branch_mispredict)));
+  ret.push_back(Entry(m_result_attrs["machine_clears"],
+                      Variant(1.0 - branch_mispredict)));
+
+  return ret;
+}
+
+std::size_t HaswellTopdown::get_num_expected_bad_speculation() const {
+  return 2;
+}
+
+} // namespace topdown
+} // namespace cali
\ No newline at end of file
diff --git a/src/services/topdown/HaswellTopdown.h b/src/services/topdown/HaswellTopdown.h
new file mode 100644
index 00000000..39622d89
--- /dev/null
+++ b/src/services/topdown/HaswellTopdown.h
@@ -0,0 +1,44 @@
+#ifndef CALI_TOPDOWN_HASWELL_TOPDOWN_H
+#define CALI_TOPDOWN_HASWELL_TOPDOWN_H
+
+#include "TopdownCalculator.h"
+
+namespace cali {
+namespace topdown {
+
+class HaswellTopdown : public TopdownCalculator {
+public:
+  HaswellTopdown(IntelTopdownLevel level);
+
+  virtual ~HaswellTopdown() = default;
+
+  virtual std::vector<Entry>
+  compute_toplevel(const std::vector<Entry> &rec) override;
+
+  virtual std::size_t get_num_expected_toplevel() const override;
+
+  virtual std::vector<Entry>
+  compute_retiring(const std::vector<Entry> &rec) override;
+
+  virtual std::size_t get_num_expected_retiring() const override;
+
+  virtual std::vector<Entry>
+  compute_backend_bound(const std::vector<Entry> &rec) override;
+
+  virtual std::size_t get_num_expected_backend_bound() const override;
+
+  virtual std::vector<Entry>
+  compute_frontend_bound(const std::vector<Entry> &rec) override;
+
+  virtual std::size_t get_num_expected_frontend_bound() const override;
+
+  virtual std::vector<Entry>
+  compute_bad_speculation(const std::vector<Entry> &rec) override;
+
+  virtual std::size_t get_num_expected_bad_speculation() const override;
+};
+
+} // namespace topdown
+} // namespace cali
+
+#endif /* CALI_TOPDOWN_HASWELL_TOPDOWN_H */
\ No newline at end of file
diff --git a/src/services/topdown/IntelTopdown.cpp b/src/services/topdown/IntelTopdown.cpp
index 1087c10e..507a7648 100644
--- a/src/services/topdown/IntelTopdown.cpp
+++ b/src/services/topdown/IntelTopdown.cpp
@@ -8,7 +8,9 @@
 
 #include "../Services.h"
 
-#include "caliper/Caliper.h"
+#include "HaswellTopdown.h"
+#include "SapphireRapidsTopdown.h"
+
 #include "caliper/SnapshotRecord.h"
 
 #include "caliper/common/Log.h"
@@ -18,608 +20,13 @@
 #include "../Services.h"
 
 #include <algorithm>
-#include <map>
 #include <sstream>
-#include <vector>
 
 using namespace cali;
 
 namespace
 {
 
-enum IntelTopdownLevel { All = 1, Top = 2 };
-
-class TopdownCalculator
-{
-protected:
-
-    IntelTopdownLevel m_level;
-
-    const char* m_top_counters;
-    const char* m_all_counters;
-
-    std::vector<const char*> m_res_top;
-    std::vector<const char*> m_res_all;
-
-    std::map<std::string, Attribute> m_counter_attrs;
-    std::map<std::string, Attribute> m_result_attrs;
-
-    std::map<std::string, int> m_counters_not_found;
-
-    Variant get_val_from_rec(const std::vector<Entry>& rec, const char* name)
-    {
-        Variant ret;
-
-        auto c_it = m_counter_attrs.find(name);
-        if (c_it == m_counter_attrs.end())
-            return ret;
-
-        cali_id_t attr_id = c_it->second.id();
-
-        auto it = std::find_if(rec.begin(), rec.end(), [attr_id](const Entry& e) { return e.attribute() == attr_id; });
-
-        if (it != rec.end())
-            ret = it->value();
-        else
-            ++m_counters_not_found[std::string(name)];
-
-        return ret;
-    }
-
-    TopdownCalculator(
-        IntelTopdownLevel          level,
-        const char*                top_counters,
-        const char*                all_counters,
-        std::vector<const char*>&& res_top,
-        std::vector<const char*>&& res_all
-    )
-        : m_level(level),
-          m_top_counters(top_counters),
-          m_all_counters(all_counters),
-          m_res_top(res_top),
-          m_res_all(res_all)
-    {}
-
-public:
-
-    TopdownCalculator(IntelTopdownLevel level) : m_level(level) {}
-
-    virtual ~TopdownCalculator() = default;
-
-    virtual std::vector<Entry> compute_toplevel(const std::vector<Entry>& rec) = 0;
-
-    virtual std::size_t get_num_expected_toplevel() const = 0;
-
-    virtual std::vector<Entry> compute_retiring(const std::vector<Entry>& rec) = 0;
-
-    virtual std::size_t get_num_expected_retiring() const = 0;
-
-    virtual std::vector<Entry> compute_backend_bound(const std::vector<Entry>& rec) = 0;
-
-    virtual std::size_t get_num_expected_backend_bound() const = 0;
-
-    virtual std::vector<Entry> compute_frontend_bound(const std::vector<Entry>& rec) = 0;
-
-    virtual std::size_t get_num_expected_frontend_bound() const = 0;
-
-    virtual std::vector<Entry> compute_bad_speculation(const std::vector<Entry>& rec) = 0;
-
-    virtual std::size_t get_num_expected_bad_speculation() const = 0;
-
-    bool find_counter_attrs(CaliperMetadataAccessInterface& db)
-    {
-        const char* list     = (m_level == All ? m_all_counters : m_top_counters);
-        auto        counters = StringConverter(list).to_stringlist();
-
-        for (const auto& s : counters) {
-            Attribute attr = db.get_attribute(std::string("sum#papi.") + s);
-
-            if (!attr)
-                attr = db.get_attribute(std::string("papi.") + s);
-            if (!attr) {
-                Log(0).stream() << "topdown: " << s << " counter attribute not found!" << std::endl;
-                return false;
-            }
-
-            m_counter_attrs[s] = attr;
-        }
-
-        return true;
-    }
-
-    void make_result_attrs(CaliperMetadataAccessInterface& db)
-    {
-        std::vector<const char*>& res = (m_level == Top ? m_res_top : m_res_all);
-
-        for (const char* s : res) {
-            m_result_attrs[std::string(s)] = db.create_attribute(
-                std::string("topdown.") + s,
-                CALI_TYPE_DOUBLE,
-                CALI_ATTR_ASVALUE | CALI_ATTR_SKIP_EVENTS
-            );
-        }
-    }
-
-    const std::map<std::string, int>& get_counters_not_found() const { return m_counters_not_found; }
-
-    const char* get_counters() const
-    {
-        if (m_level == All) {
-            return m_all_counters;
-        } else {
-            return m_top_counters;
-        }
-    }
-
-    IntelTopdownLevel get_level() const { return m_level; }
-};
-
-class HaswellTopdown : public TopdownCalculator
-{
-public:
-
-    HaswellTopdown(IntelTopdownLevel level)
-        : TopdownCalculator(
-            level,
-            // top_counters
-            "CPU_CLK_THREAD_UNHALTED:THREAD_P"
-            ",IDQ_UOPS_NOT_DELIVERED:CORE"
-            ",INT_MISC:RECOVERY_CYCLES"
-            ",UOPS_ISSUED:ANY"
-            ",UOPS_RETIRED:RETIRE_SLOTS",
-            // all_counters
-            "BR_MISP_RETIRED:ALL_BRANCHES"
-            ",CPU_CLK_THREAD_UNHALTED:THREAD_P"
-            ",CYCLE_ACTIVITY:CYCLES_NO_EXECUTE"
-            ",CYCLE_ACTIVITY:STALLS_L1D_PENDING"
-            ",CYCLE_ACTIVITY:STALLS_L2_PENDING"
-            ",CYCLE_ACTIVITY:STALLS_LDM_PENDING"
-            ",IDQ_UOPS_NOT_DELIVERED:CORE"
-            ",IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE"
-            ",INT_MISC:RECOVERY_CYCLES"
-            ",MACHINE_CLEARS:COUNT"
-            ",MEM_LOAD_UOPS_RETIRED:L3_HIT"
-            ",MEM_LOAD_UOPS_RETIRED:L3_MISS"
-            ",UOPS_EXECUTED:CORE_CYCLES_GE_1"
-            ",UOPS_EXECUTED:CORE_CYCLES_GE_2"
-            ",UOPS_ISSUED:ANY"
-            ",UOPS_RETIRED:RETIRE_SLOTS",
-            // res_top
-            { "retiring", "backend_bound", "frontend_bound", "bad_speculation" },
-            // res_all
-            { "retiring",
-              "backend_bound",
-              "frontend_bound",
-              "bad_speculation",
-              "branch_mispredict",
-              "machine_clears",
-              "frontend_latency",
-              "frontend_bandwidth",
-              "memory_bound",
-              "core_bound",
-              "ext_mem_bound",
-              "l1_bound",
-              "l2_bound",
-              "l3_bound" }
-        )
-    {}
-
-    virtual ~HaswellTopdown() = default;
-
-    virtual std::vector<Entry> compute_toplevel(const std::vector<Entry>& rec) override
-    {
-        std::vector<Entry> ret;
-
-        Variant v_cpu_clk_unhalted_thread_p   = get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P");
-        Variant v_uops_retired_retire_slots   = get_val_from_rec(rec, "UOPS_RETIRED:RETIRE_SLOTS");
-        Variant v_uops_issued_any             = get_val_from_rec(rec, "UOPS_ISSUED:ANY");
-        Variant v_int_misc_recovery_cycles    = get_val_from_rec(rec, "INT_MISC:RECOVERY_CYCLES");
-        Variant v_idq_uops_not_delivered_core = get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CORE");
-
-        bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() || v_uops_retired_retire_slots.empty()
-                             || v_uops_issued_any.empty() || v_int_misc_recovery_cycles.empty()
-                             || v_idq_uops_not_delivered_core.empty();
-        bool is_nonzero = v_cpu_clk_unhalted_thread_p.to_double() > 0.0 && v_uops_retired_retire_slots.to_double() > 0.0
-                          && v_uops_issued_any.to_double() > 0.0 && v_int_misc_recovery_cycles.to_double() > 0.0
-                          && v_idq_uops_not_delivered_core.to_double() > 0.0;
-
-        double slots = 4.0 * v_cpu_clk_unhalted_thread_p.to_double();
-
-        if (is_incomplete || !is_nonzero || slots < 1.0)
-            return ret;
-
-        double retiring        = v_uops_retired_retire_slots.to_double() / slots;
-        double bad_speculation = (v_uops_issued_any.to_double() - v_uops_retired_retire_slots.to_double()
-                                  + 4.0 * v_int_misc_recovery_cycles.to_double())
-                                 / slots;
-        double frontend_bound = v_idq_uops_not_delivered_core.to_double() / slots;
-        double backend_bound  = 1.0 - (retiring + bad_speculation + frontend_bound);
-
-        ret.reserve(4);
-        ret.push_back(Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
-        ret.push_back(Entry(m_result_attrs["backend_bound"], Variant(std::max(backend_bound, 0.0))));
-        ret.push_back(Entry(m_result_attrs["frontend_bound"], Variant(std::max(frontend_bound, 0.0))));
-        ret.push_back(Entry(m_result_attrs["bad_speculation"], Variant(std::max(bad_speculation, 0.0))));
-
-        return ret;
-    }
-
-    virtual std::size_t get_num_expected_toplevel() const override { return 4; }
-
-    virtual std::vector<Entry> compute_retiring(const std::vector<Entry>& rec) override { return {}; }
-
-    virtual std::size_t get_num_expected_retiring() const override { return 0; }
-
-    virtual std::vector<Entry> compute_backend_bound(const std::vector<Entry>& rec) override
-    {
-        std::vector<Entry> ret;
-
-        Variant v_cpu_clk_unhalted_thread_p         = get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P");
-        Variant v_cycle_activity_stalls_ldm_pending = get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_LDM_PENDING");
-        Variant v_cycle_activity_cycles_no_execute  = get_val_from_rec(rec, "CYCLE_ACTIVITY:CYCLES_NO_EXECUTE");
-        Variant v_uops_executed_core_cycles_ge_1    = get_val_from_rec(rec, "UOPS_EXECUTED:CORE_CYCLES_GE_1");
-        Variant v_uops_executed_core_cycles_ge_2    = get_val_from_rec(rec, "UOPS_EXECUTED:CORE_CYCLES_GE_2");
-        Variant v_mem_load_uops_retired_l3_miss     = get_val_from_rec(rec, "MEM_LOAD_UOPS_RETIRED:L3_MISS");
-        Variant v_mem_load_uops_retired_l3_hit      = get_val_from_rec(rec, "MEM_LOAD_UOPS_RETIRED:L3_HIT");
-        Variant v_cycle_activity_stalls_l2_pending  = get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_L2_PENDING");
-        Variant v_cycle_activity_stalls_l1d_pending = get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_L1D_PENDING");
-
-        bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() || v_cycle_activity_stalls_ldm_pending.empty()
-                             || v_cycle_activity_cycles_no_execute.empty() || v_uops_executed_core_cycles_ge_1.empty()
-                             || v_uops_executed_core_cycles_ge_2.empty() || v_mem_load_uops_retired_l3_miss.empty()
-                             || v_mem_load_uops_retired_l3_hit.empty() || v_cycle_activity_stalls_l2_pending.empty()
-                             || v_cycle_activity_stalls_l1d_pending.empty();
-
-        double clocks = v_cpu_clk_unhalted_thread_p.to_double();
-
-        if (is_incomplete || !(clocks > 1.0))
-            return ret;
-
-        double memory_bound = v_cycle_activity_stalls_ldm_pending.to_double() / clocks;
-        double be_bound_at_exe =
-            (v_cycle_activity_cycles_no_execute.to_double() + v_uops_executed_core_cycles_ge_1.to_double()
-             - v_uops_executed_core_cycles_ge_2.to_double())
-            / clocks;
-        double l3_tot = v_mem_load_uops_retired_l3_hit.to_double() + 7.0 * v_mem_load_uops_retired_l3_miss.to_double();
-        double l3_hit_fraction  = 0.0;
-        double l3_miss_fraction = 0.0;
-        if (l3_tot > 0.0) {
-            l3_hit_fraction  = v_mem_load_uops_retired_l3_hit.to_double() / l3_tot;
-            l3_miss_fraction = v_mem_load_uops_retired_l3_miss.to_double() / l3_tot;
-        }
-        double ext_mem_bound = v_cycle_activity_stalls_l2_pending.to_double() * l3_miss_fraction / clocks;
-        double l1_bound =
-            (v_cycle_activity_stalls_ldm_pending.to_double() - v_cycle_activity_stalls_l1d_pending.to_double())
-            / clocks;
-        double l2_bound =
-            (v_cycle_activity_stalls_l1d_pending.to_double() - v_cycle_activity_stalls_l2_pending.to_double()) / clocks;
-        double l3_bound = v_cycle_activity_stalls_l2_pending.to_double() * l3_hit_fraction / clocks;
-
-        ret.reserve(6);
-        ret.push_back(Entry(result_attrs["memory_bound"], Variant(memory_bound)));
-        ret.push_back(Entry(result_attrs["core_bound"], Variant(be_bound_at_exe - memory_bound)));
-        ret.push_back(Entry(result_attrs["ext_mem_bound"], Variant(ext_mem_bound)));
-        ret.push_back(Entry(result_attrs["l1_bound"], Variant(l1_bound)));
-        ret.push_back(Entry(result_attrs["l2_bound"], Variant(l2_bound)));
-        ret.push_back(Entry(result_attrs["l3_bound"], Variant(l3_bound)));
-
-        return ret;
-    }
-
-    virtual std::size_t get_num_expected_backend_bound() const override { return 6; }
-
-    virtual std::vector<Entry> compute_frontend_bound(const std::vector<Entry>& rec) override
-    {
-        std::vector<Entry> ret;
-
-        Variant v_cpu_clk_unhalted_thread_p = get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P");
-        Variant v_idq_uops_not_delivered    = get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE");
-
-        bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() || v_idq_uops_not_delivered.empty();
-
-        double clocks = v_cpu_clk_unhalted_thread_p.to_double();
-        double uops   = v_idq_uops_not_delivered.to_double();
-
-        if (is_incomplete || clocks < 1.0 || uops > clocks)
-            return ret;
-
-        double fe_latency = uops / clocks;
-
-        ret.reserve(2);
-        ret.push_back(Entry(result_attrs["frontend_latency"], Variant(fe_latency)));
-        ret.push_back(Entry(result_attrs["frontend_bandwidth"], Variant(1.0 - fe_latency)));
-
-        return ret;
-    }
-
-    virtual std::size_t get_num_expected_frontend_bound() const override { return 2; }
-
-    virtual std::vector<Entry> compute_bad_speculation(const std::vector<Entry>& rec) override
-    {
-        std::vector<Entry> ret;
-
-        Variant v_br_misp_retired_all_branches = get_val_from_rec(rec, "BR_MISP_RETIRED:ALL_BRANCHES");
-        Variant v_machine_clears_count         = get_val_from_rec(rec, "MACHINE_CLEARS:COUNT");
-
-        bool is_incomplete = v_br_misp_retired_all_branches.empty() || v_machine_clears_count.empty();
-
-        double br_misp_retired_all_branches = v_br_misp_retired_all_branches.to_double();
-        double machine_clears_count         = v_machine_clears_count.to_double();
-
-        if (is_incomplete || !(br_misp_retired_all_branches + machine_clears_count > 1.0))
-            return ret;
-
-        double branch_mispredict = br_misp_retired_all_branches / (br_misp_retired_all_branches + machine_clears_count);
-
-        ret.reserve(2);
-        ret.push_back(Entry(result_attrs["branch_mispredict"], Variant(branch_mispredict)));
-        ret.push_back(Entry(result_attrs["machine_clears"], Variant(1.0 - branch_mispredict)));
-
-        return ret;
-    }
-
-    virtual std::size_t get_num_expected_bad_speculation() const override { return 2; }
-};
-
-class SapphireRapidsTopdown : public TopdownCalculator
-{
-public:
-
-    SapphireRapidsTopdown(IntelTopdownLevel level)
-        : TopdownCalculator(
-            level,
-            // top_counters
-            "perf::slots"
-            ",perf::topdown-retiring"
-            ",perf::topdown-bad-spec"
-            ",perf::topdown-fe-bound"
-            ",perf::topdown-be-bound"
-            ",INT_MISC:UOP_DROPPING",
-            // all_counters
-            "perf::slots"
-            ",perf::topdown-retiring"
-            ",perf::topdown-bad-spec"
-            ",perf::topdown-fe-bound"
-            ",perf::topdown-be-bound"
-            ",INT_MISC:UOP_DROPPING"
-            ",perf_raw::r8400"  // topdown-heavy-ops
-            ",perf_raw::r8500"  // topdown-br-mispredict
-            ",perf_raw::r8600"  // topdown-fetch-lat
-            ",perf_raw::r8700", // topdown-mem-bound
-            // res_top
-            { "retiring", "backend_bound", "frontend_bound", "bad_speculation" },
-            // res_all
-            { "retiring",
-              "backend_bound",
-              "frontend_bound",
-              "bad_speculation",
-              "branch_mispredict",
-              "machine_clears",
-              "frontend_latency",
-              "frontend_bandwidth",
-              "memory_bound",
-              "core_bound",
-              "light_ops",
-              "heavy_ops" }
-        )
-    {}
-
-    virtual ~SapphireRapidsTopdown() = default;
-
-    virtual std::vector<Entry> compute_toplevel(const std::vector<Entry>& rec) override
-    {
-        std::vector<Entry> ret;
-
-        // Get PAPI metrics for toplevel calculations
-        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-        Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
-        Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
-        Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
-        Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
-        Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
-
-        // Check if any Variant is empty (use .empty())
-        bool is_incomplete = v_fe_bound.empty() || v_be_bound.emtpy() || v_bad_spec.empty() || v_retiring.empty()
-                             || v_int_misc_uop_dropping.empty() || v_slots_or_info_thread_slots.empty();
-        // Check if all Variants are greater than 0 when casted to doubles (use
-        // .to_double())
-        bool is_nonzero = v_fe_bound.to_double() > 0.0 && v_be_bound.to_double() > 0.0 && v_bad_spec.to_double() > 0.0
-                          && v_retiring.to_double() > 0.0 && v_int_misc_uop_dropping.to_double() > 0.0
-                          && v_slots_or_info_thread_slots.to_double() > 0.0;
-
-        // Check if bad values were obtained
-        if (is_incomplete || !is_nonzero)
-            return ret;
-
-        // Perform toplevel calcs
-        double toplevel_sum =
-            (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
-
-        double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
-        double frontend_bound = (v_fe_bound.to_double() / toplevel_sum)
-                                - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double());
-        double backend_bound = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
-        double bad_speculation = std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
-
-        // Add toplevel metrics to vector of Entry
-        ret.reserve(4);
-        ret.push_back(Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
-        ret.push_back(Entry(m_result_attrs["backend_bound"], Variant(std::max(backend_bound, 0.0))));
-        ret.push_back(Entry(m_result_attrs["frontend_bound"], Variant(std::max(frontend_bound, 0.0))));
-        ret.push_back(Entry(m_result_attrs["bad_speculation"], Variant(std::max(bad_speculation, 0.0))));
-
-        return ret;
-    }
-
-    virtual std::size_t get_num_expected_toplevel() const override { return 4; }
-
-    virtual std::vector<Entry> compute_retiring(const std::vector<Entry>& rec) override
-    {
-        std::vector<Entry> ret;
-
-        // Get PAPI metrics for toplevel calculations
-        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-        Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
-        Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
-        Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
-        Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
-        Variant v_heavy_ops                  = get_val_from_rec(rec, "perf_raw::r8400");
-
-        // Check if any Variant is empty (use .empty())
-        bool is_incomplete = v_fe_bound.empty() || v_be_bound.emtpy() || v_bad_spec.empty() || v_retiring.empty()
-                             || v_slots_or_info_thread_slots.empty() || v_heavy_ops.empty();
-
-        // Check if bad values were obtained
-        if (is_incomplete)
-            return ret;
-
-        double toplevel_sum =
-            (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
-        // Copied from compute_toplevel
-        double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
-
-        double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
-        double light_ops = std::max(0.0, retiring - heavy_ops);
-
-        // Add toplevel metrics to vector of Entry
-        ret.reserve(2);
-        ret.push_back(Entry(m_result_attrs["heavy_ops"], Variant(std::max(heavy_ops, 0.0))));
-        ret.push_back(Entry(m_result_attrs["light_ops"], Variant(std::max(light_ops, 0.0))));
-
-        return ret;
-    }
-
-    virtual std::size_t get_num_expected_retiring() const override { return 2; }
-
-    virtual std::vector<Entry> compute_backend_bound(const std::vector<Entry>& rec) override
-    {
-        std::vector<Entry> ret;
-
-        // Get PAPI metrics for toplevel calculations
-        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-        Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
-        Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
-        Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
-        Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
-        Variant v_memory_bound               = get_val_from_rec(rec, "perf_raw::r8700");
-
-        // Check if any Variant is empty (use .empty())
-        bool is_incomplete = v_fe_bound.empty() || v_be_bound.emtpy() || v_bad_spec.empty() || v_retiring.empty()
-                             || v_slots_or_info_thread_slots.empty() || v_memory_bound.empty();
-
-        // Check if bad values were obtained
-        if (is_incomplete)
-            return ret;
-
-        double toplevel_sum =
-            (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
-        // Copied from compute_toplevel
-        double backend_bound = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
-
-        double memory_bound =
-            (v_memory_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
-        double core_bound = std::max(0.0, backend_bound - memory_bound);
-
-        // Add toplevel metrics to vector of Entry
-        ret.reserve(2);
-        ret.push_back(Entry(m_result_attrs["memory_bound"], Variant(std::max(memory_bound, 0.0))));
-        ret.push_back(Entry(m_result_attrs["core_bound"], Variant(std::max(core_bound, 0.0))));
-
-        return ret;
-    }
-
-    virtual std::size_t get_num_expected_backend_bound() const override { return 2; }
-
-    virtual std::vector<Entry> compute_frontend_bound(const std::vector<Entry>& rec) override
-    {
-        std::vector<Entry> ret;
-
-        // Get PAPI metrics for toplevel calculations
-        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-        Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
-        Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
-        Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
-        Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
-        Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
-        Variant v_fetch_latency              = get_val_from_rec(rec, "perf_raw::r8600");
-
-        // Check if any Variant is empty (use .empty())
-        bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || v_retiring.empty()
-                             || v_int_misc_uop_dropping.empty() || v_slots_or_info_thread_slots.empty()
-                             || v_fetch_latency.empty();
-
-        // Check if bad values were obtained
-        if (is_incomplete)
-            return ret;
-
-        double toplevel_sum =
-            (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
-        // Copied from compute_toplevel
-        double frontend_bound = (v_fe_bound.to_double() / toplevel_sum)
-                                - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double());
-
-        double fetch_latency = (v_fetch_latency.to_double() / toplevel_sum)
-                               - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double());
-
-        double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency);
-
-        // Add toplevel metrics to vector of Entry
-        ret.reserve(2);
-        ret.push_back(Entry(m_result_attrs["frontend_latency"], Variant(std::max(fetch_latency, 0.0))));
-        ret.push_back(Entry(m_result_attrs["frontend_bandwidth"], Variant(std::max(fetch_bandwidth, 0.0))));
-
-        return ret;
-    }
-
-    virtual std::size_t get_num_expected_frontend_bound() const override { return 2; }
-
-    virtual std::vector<Entry> compute_bad_speculation(const std::vector<Entry>& rec) override
-    {
-        std::vector<Entry> ret;
-
-        // Get PAPI metrics for toplevel calculations
-        Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-        Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
-        Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
-        Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
-        Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
-        Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
-        Variant v_branch_mispredict          = get_val_from_rec(rec, "perf_raw::r8500");
-
-        // Check if any Variant is empty (use .empty())
-        bool is_incomplete = v_fe_bound.empty() || v_be_bound.emtpy() || v_bad_spec.empty() || v_retiring.empty()
-                             || v_int_misc_uop_dropping.empty() || v_slots_or_info_thread_slots.empty()
-                             || v_branch_mispredict.empty();
-
-        // Check if bad values were obtained
-        if (is_incomplete)
-            return ret;
-
-        // Perform toplevel calcs
-        double toplevel_sum =
-            (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
-
-        double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
-        double frontend_bound = (v_fe_bound.to_double() / toplevel_sum)
-                                - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double());
-        double backend_bound = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
-        double bad_speculation = std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
-
-        double branch_mispredict =
-            (v_branch_mispredict.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
-        double machine_clears = std::max(0.0, bad_speculation - branch_mispredict);
-
-        // Add toplevel metrics to vector of Entry
-        ret.reserve(2);
-        ret.push_back(Entry(m_result_attrs["branch_mispredict"], Variant(std::max(branch_mispredict, 0.0))));
-        ret.push_back(Entry(m_result_attrs["machine_clears"], Variant(std::max(machine_clears, 0.0))));
-
-        return ret;
-    }
-
-    virtual std::size_t get_num_expected_bad_speculation() const override { return 2; }
-};
-
 class IntelTopdown
 {
     unsigned num_top_computed;
@@ -633,9 +40,9 @@ class IntelTopdown
     unsigned num_ret_computed;
     unsigned num_ret_skipped;
 
-    IntelTopdownLevel m_level;
+    cali::topdown::IntelTopdownLevel m_level;
 
-    TopdownCalculator* m_calculator;
+    cali::topdown::TopdownCalculator* m_calculator;
 
     bool find_counter_attrs(CaliperMetadataAccessInterface& db) { return m_calculator->find_counter_attrs(db); }
 
@@ -715,7 +122,7 @@ class IntelTopdown
         }
     }
 
-    explicit IntelTopdown(TopdownCalculator* calculator)
+    explicit IntelTopdown(cali::topdown::TopdownCalculator* calculator)
         : num_top_computed(0),
           num_top_skipped(0),
           num_be_computed(0),
@@ -737,11 +144,9 @@ class IntelTopdown
 
 public:
 
-    static const char* s_spec;
-
     static void intel_topdown_register(Caliper* c, Channel* channel)
     {
-        IntelTopdownLevel level = Top;
+        cali::topdown::IntelTopdownLevel level = Top;
 
         auto        config = services::init_config_from_spec(channel->config(), s_spec);
         std::string lvlcfg = config.get("level").to_string();
@@ -754,14 +159,14 @@ class IntelTopdown
             return;
         }
 
-        TopdownCalculator* calculator;
+        cali::topdown::TopdownCalculator* calculator;
 
 #if defined(CALIPER_HAVE_ARCH)
         if (std::string(CALIPER_HAVE_ARCH) == "sapphirerapids") {
-            calculator = new SapphireRapidsTopdown(level);
+            calculator = new cali::topdown::SapphireRapidsTopdown(level);
         } else {
 #endif
-            calculator = new HaswellTopdown(level); // Default type of calculation
+            calculator = new cali::topdown::HaswellTopdown(level); // Default type of calculation
 #if defined(CALIPER_HAVE_ARCH)
         }
 #endif
diff --git a/src/services/topdown/SapphireRapidsTopdown.cpp b/src/services/topdown/SapphireRapidsTopdown.cpp
new file mode 100644
index 00000000..457489b3
--- /dev/null
+++ b/src/services/topdown/SapphireRapidsTopdown.cpp
@@ -0,0 +1,299 @@
+#include "SapphireRapidsTopdown.h"
+
+#include <algorithm>
+
+namespace cali {
+namespace topdown {
+
+SapphireRapidsTopdown::SapphireRapidsTopdown(IntelTopdownLevel level)
+    : cali::topdown::TopdownCalculator(
+          level,
+          // top_counters
+          "perf::slots"
+          ",perf::topdown-retiring"
+          ",perf::topdown-bad-spec"
+          ",perf::topdown-fe-bound"
+          ",perf::topdown-be-bound"
+          ",INT_MISC:UOP_DROPPING",
+          // all_counters
+          "perf::slots"
+          ",perf::topdown-retiring"
+          ",perf::topdown-bad-spec"
+          ",perf::topdown-fe-bound"
+          ",perf::topdown-be-bound"
+          ",INT_MISC:UOP_DROPPING"
+          ",perf_raw::r8400"  // topdown-heavy-ops
+          ",perf_raw::r8500"  // topdown-br-mispredict
+          ",perf_raw::r8600"  // topdown-fetch-lat
+          ",perf_raw::r8700", // topdown-mem-bound
+          // res_top
+          {"retiring", "backend_bound", "frontend_bound", "bad_speculation"},
+          // res_all
+          {"retiring", "backend_bound", "frontend_bound", "bad_speculation",
+           "branch_mispredict", "machine_clears", "frontend_latency",
+           "frontend_bandwidth", "memory_bound", "core_bound", "light_ops",
+           "heavy_ops"}) {}
+
+std::vector<Entry>
+SapphireRapidsTopdown::compute_toplevel(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  // Get PAPI metrics for toplevel calculations
+  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
+  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
+  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
+  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
+  Variant v_int_misc_uop_dropping =
+      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
+
+  // Check if any Variant is empty (use .empty())
+  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
+                       v_bad_spec.empty() || v_retiring.empty() ||
+                       v_int_misc_uop_dropping.empty() ||
+                       v_slots_or_info_thread_slots.empty();
+  // Check if all Variants are greater than 0 when casted to doubles (use
+  // .to_double())
+  bool is_nonzero =
+      v_fe_bound.to_double() > 0.0 && v_be_bound.to_double() > 0.0 &&
+      v_bad_spec.to_double() > 0.0 && v_retiring.to_double() > 0.0 &&
+      v_int_misc_uop_dropping.to_double() > 0.0 &&
+      v_slots_or_info_thread_slots.to_double() > 0.0;
+
+  // Check if bad values were obtained
+  if (is_incomplete || !is_nonzero)
+    return ret;
+
+  // Perform toplevel calcs
+  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
+                         v_fe_bound.to_double() + v_be_bound.to_double());
+
+  double retiring = (v_retiring.to_double() / toplevel_sum) +
+                    (0 * v_slots_or_info_thread_slots.to_double());
+  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
+                          (v_int_misc_uop_dropping.to_double() /
+                           v_slots_or_info_thread_slots.to_double());
+  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
+                         (0 * v_slots_or_info_thread_slots.to_double());
+  double bad_speculation =
+      std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
+
+  // Add toplevel metrics to vector of Entry
+  ret.reserve(4);
+  ret.push_back(
+      Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
+  ret.push_back(Entry(m_result_attrs["backend_bound"],
+                      Variant(std::max(backend_bound, 0.0))));
+  ret.push_back(Entry(m_result_attrs["frontend_bound"],
+                      Variant(std::max(frontend_bound, 0.0))));
+  ret.push_back(Entry(m_result_attrs["bad_speculation"],
+                      Variant(std::max(bad_speculation, 0.0))));
+
+  return ret;
+}
+
+std::size_t SapphireRapidsTopdown::get_num_expected_toplevel() const {
+  return 4;
+}
+
+std::vector<Entry>
+SapphireRapidsTopdown::compute_retiring(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  // Get PAPI metrics for toplevel calculations
+  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
+  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
+  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
+  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
+  Variant v_heavy_ops = get_val_from_rec(rec, "perf_raw::r8400");
+
+  // Check if any Variant is empty (use .empty())
+  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
+                       v_bad_spec.empty() || v_retiring.empty() ||
+                       v_slots_or_info_thread_slots.empty() ||
+                       v_heavy_ops.empty();
+
+  // Check if bad values were obtained
+  if (is_incomplete)
+    return ret;
+
+  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
+                         v_fe_bound.to_double() + v_be_bound.to_double());
+  // Copied from compute_toplevel
+  double retiring = (v_retiring.to_double() / toplevel_sum) +
+                    (0 * v_slots_or_info_thread_slots.to_double());
+
+  double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) +
+                     (0 * v_slots_or_info_thread_slots.to_double());
+  double light_ops = std::max(0.0, retiring - heavy_ops);
+
+  // Add toplevel metrics to vector of Entry
+  ret.reserve(2);
+  ret.push_back(
+      Entry(m_result_attrs["heavy_ops"], Variant(std::max(heavy_ops, 0.0))));
+  ret.push_back(
+      Entry(m_result_attrs["light_ops"], Variant(std::max(light_ops, 0.0))));
+
+  return ret;
+}
+
+std::size_t SapphireRapidsTopdown::get_num_expected_retiring() const {
+  return 2;
+}
+
+std::vector<Entry>
+SapphireRapidsTopdown::compute_backend_bound(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  // Get PAPI metrics for toplevel calculations
+  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
+  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
+  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
+  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
+  Variant v_memory_bound = get_val_from_rec(rec, "perf_raw::r8700");
+
+  // Check if any Variant is empty (use .empty())
+  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
+                       v_bad_spec.empty() || v_retiring.empty() ||
+                       v_slots_or_info_thread_slots.empty() ||
+                       v_memory_bound.empty();
+
+  // Check if bad values were obtained
+  if (is_incomplete)
+    return ret;
+
+  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
+                         v_fe_bound.to_double() + v_be_bound.to_double());
+  // Copied from compute_toplevel
+  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
+                         (0 * v_slots_or_info_thread_slots.to_double());
+
+  double memory_bound = (v_memory_bound.to_double() / toplevel_sum) +
+                        (0 * v_slots_or_info_thread_slots.to_double());
+  double core_bound = std::max(0.0, backend_bound - memory_bound);
+
+  // Add toplevel metrics to vector of Entry
+  ret.reserve(2);
+  ret.push_back(Entry(m_result_attrs["memory_bound"],
+                      Variant(std::max(memory_bound, 0.0))));
+  ret.push_back(
+      Entry(m_result_attrs["core_bound"], Variant(std::max(core_bound, 0.0))));
+
+  return ret;
+}
+
+std::size_t SapphireRapidsTopdown::get_num_expected_backend_bound() const {
+  return 2;
+}
+
+std::vector<Entry>
+SapphireRapidsTopdown::compute_frontend_bound(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  // Get PAPI metrics for toplevel calculations
+  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
+  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
+  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
+  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
+  Variant v_int_misc_uop_dropping =
+      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
+  Variant v_fetch_latency = get_val_from_rec(rec, "perf_raw::r8600");
+
+  // Check if any Variant is empty (use .empty())
+  bool is_incomplete =
+      v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
+      v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
+      v_slots_or_info_thread_slots.empty() || v_fetch_latency.empty();
+
+  // Check if bad values were obtained
+  if (is_incomplete)
+    return ret;
+
+  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
+                         v_fe_bound.to_double() + v_be_bound.to_double());
+  // Copied from compute_toplevel
+  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
+                          (v_int_misc_uop_dropping.to_double() /
+                           v_slots_or_info_thread_slots.to_double());
+
+  double fetch_latency = (v_fetch_latency.to_double() / toplevel_sum) -
+                         (v_int_misc_uop_dropping.to_double() /
+                          v_slots_or_info_thread_slots.to_double());
+
+  double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency);
+
+  // Add toplevel metrics to vector of Entry
+  ret.reserve(2);
+  ret.push_back(Entry(m_result_attrs["frontend_latency"],
+                      Variant(std::max(fetch_latency, 0.0))));
+  ret.push_back(Entry(m_result_attrs["frontend_bandwidth"],
+                      Variant(std::max(fetch_bandwidth, 0.0))));
+
+  return ret;
+}
+
+std::size_t SapphireRapidsTopdown::get_num_expected_frontend_bound() const {
+  return 2;
+}
+
+std::vector<Entry>
+SapphireRapidsTopdown::compute_bad_speculation(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  // Get PAPI metrics for toplevel calculations
+  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
+  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
+  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
+  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
+  Variant v_int_misc_uop_dropping =
+      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
+  Variant v_branch_mispredict = get_val_from_rec(rec, "perf_raw::r8500");
+
+  // Check if any Variant is empty (use .empty())
+  bool is_incomplete =
+      v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
+      v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
+      v_slots_or_info_thread_slots.empty() || v_branch_mispredict.empty();
+
+  // Check if bad values were obtained
+  if (is_incomplete)
+    return ret;
+
+  // Perform toplevel calcs
+  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
+                         v_fe_bound.to_double() + v_be_bound.to_double());
+
+  double retiring = (v_retiring.to_double() / toplevel_sum) +
+                    (0 * v_slots_or_info_thread_slots.to_double());
+  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
+                          (v_int_misc_uop_dropping.to_double() /
+                           v_slots_or_info_thread_slots.to_double());
+  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
+                         (0 * v_slots_or_info_thread_slots.to_double());
+  double bad_speculation =
+      std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
+
+  double branch_mispredict = (v_branch_mispredict.to_double() / toplevel_sum) +
+                             (0 * v_slots_or_info_thread_slots.to_double());
+  double machine_clears = std::max(0.0, bad_speculation - branch_mispredict);
+
+  // Add toplevel metrics to vector of Entry
+  ret.reserve(2);
+  ret.push_back(Entry(m_result_attrs["branch_mispredict"],
+                      Variant(std::max(branch_mispredict, 0.0))));
+  ret.push_back(Entry(m_result_attrs["machine_clears"],
+                      Variant(std::max(machine_clears, 0.0))));
+
+  return ret;
+}
+
+std::size_t SapphireRapidsTopdown::get_num_expected_bad_speculation() const {
+  return 2;
+}
+
+} // namespace topdown
+} // namespace cali
\ No newline at end of file
diff --git a/src/services/topdown/SapphireRapidsTopdown.h b/src/services/topdown/SapphireRapidsTopdown.h
new file mode 100644
index 00000000..5038305b
--- /dev/null
+++ b/src/services/topdown/SapphireRapidsTopdown.h
@@ -0,0 +1,44 @@
+#ifndef CALI_TOPDOWN_SAPPHIRE_RAPIDS_TOPDOWN_H
+#define CALI_TOPDOWN_SAPPHIRE_RAPIDS_TOPDOWN_H
+
+#include "TopdownCalculator.h"
+
+namespace cali {
+namespace topdown {
+
+class SapphireRapidsTopdown : public TopdownCalculator {
+public:
+  SapphireRapidsTopdown(IntelTopdownLevel level);
+
+  virtual ~SapphireRapidsTopdown() = default;
+
+  virtual std::vector<Entry>
+  compute_toplevel(const std::vector<Entry> &rec) override;
+
+  virtual std::size_t get_num_expected_toplevel() const override;
+
+  virtual std::vector<Entry>
+  compute_retiring(const std::vector<Entry> &rec) override;
+
+  virtual std::size_t get_num_expected_retiring() const override;
+
+  virtual std::vector<Entry>
+  compute_backend_bound(const std::vector<Entry> &rec) override;
+
+  virtual std::size_t get_num_expected_backend_bound() const override;
+
+  virtual std::vector<Entry>
+  compute_frontend_bound(const std::vector<Entry> &rec) override;
+
+  virtual std::size_t get_num_expected_frontend_bound() const override;
+
+  virtual std::vector<Entry>
+  compute_bad_speculation(const std::vector<Entry> &rec) override;
+
+  virtual std::size_t get_num_expected_bad_speculation() const override;
+};
+
+} // namespace topdown
+} // namespace cali
+
+#endif /* CALI_TOPDOWN_SAPPHIRE_RAPIDS_TOPDOWN_H */
\ No newline at end of file
diff --git a/src/services/topdown/TopdownCalculator.cpp b/src/services/topdown/TopdownCalculator.cpp
new file mode 100644
index 00000000..d1c8909b
--- /dev/null
+++ b/src/services/topdown/TopdownCalculator.cpp
@@ -0,0 +1,90 @@
+#include "TopdownCalculator.h"
+
+#include "caliper/common/Log.h"
+
+#include <algorithm>
+
+namespace cali {
+namespace topdown {
+
+Variant TopdownCalculator::get_val_from_rec(const std::vector<Entry> &rec,
+                                            const char *name) {
+  Variant ret;
+
+  auto c_it = m_counter_attrs.find(name);
+  if (c_it == m_counter_attrs.end())
+    return ret;
+
+  cali_id_t attr_id = c_it->second.id();
+
+  auto it = std::find_if(rec.begin(), rec.end(), [attr_id](const Entry &e) {
+    return e.attribute() == attr_id;
+  });
+
+  if (it != rec.end())
+    ret = it->value();
+  else
+    ++m_counters_not_found[std::string(name)];
+
+  return ret;
+}
+
+TopdownCalculator::TopdownCalculator(IntelTopdownLevel level,
+                                     const char *top_counters,
+                                     const char *all_counters,
+                                     std::vector<const char *> &&res_top,
+                                     std::vector<const char *> &&res_all)
+    : m_level(level), m_top_counters(top_counters),
+      m_all_counters(all_counters), m_res_top(res_top), m_res_all(res_all) {}
+
+TopdownCalculator::TopdownCalculator(IntelTopdownLevel level)
+    : m_level(level) {}
+
+bool TopdownCalculator::find_counter_attrs(CaliperMetadataAccessInterface &db) {
+  const char *list = (m_level == All ? m_all_counters : m_top_counters);
+  auto counters = StringConverter(list).to_stringlist();
+
+  for (const auto &s : counters) {
+    Attribute attr = db.get_attribute(std::string("sum#papi.") + s);
+
+    if (attr == Attribute::invalid)
+      attr = db.get_attribute(std::string("papi.") + s);
+    if (attr == Attribute::invalid) {
+      Log(0).stream() << "topdown: " << s << " counter attribute not found!"
+                      << std::endl;
+      return false;
+    }
+
+    m_counter_attrs[s] = attr;
+  }
+
+  return true;
+}
+
+void TopdownCalculator::make_result_attrs(CaliperMetadataAccessInterface &db) {
+  std::vector<const char *> &res = (m_level == Top ? m_res_top : m_res_all);
+
+  for (const char *s : res) {
+    m_result_attrs[std::string(s)] =
+        db.create_attribute(std::string("topdown.") + s, CALI_TYPE_DOUBLE,
+                            CALI_ATTR_ASVALUE | CALI_ATTR_SKIP_EVENTS);
+  }
+}
+
+const std::map<std::string, int> &
+TopdownCalculator::get_counters_not_found() const {
+  return m_counters_not_found;
+}
+
+const char *TopdownCalculator::get_counters() const {
+  if (m_level == All) {
+    return m_all_counters;
+  } else {
+    return m_top_counters;
+  }
+}
+
+IntelTopdownLevel TopdownCalculator::get_level() const { return m_level; }
+
+} // namespace topdown
+} // namespace cali
\ No newline at end of file
diff --git a/src/services/topdown/TopdownCalculator.h b/src/services/topdown/TopdownCalculator.h
new file mode 100644
index 00000000..eb40043a
--- /dev/null
+++ b/src/services/topdown/TopdownCalculator.h
@@ -0,0 +1,80 @@
+#ifndef CALI_TOPDOWN_TOPDOWN_CALCULATOR_H
+#define CALI_TOPDOWN_TOPDOWN_CALCULATOR_H
+
+#include "caliper/Caliper.h"
+
+#include <map>
+#include <vector>
+
+namespace cali {
+namespace topdown {
+
+enum IntelTopdownLevel { All = 1, Top = 2 };
+
+class TopdownCalculator {
+protected:
+  IntelTopdownLevel m_level;
+
+  const char *m_top_counters;
+  const char *m_all_counters;
+
+  std::vector<const char *> m_res_top;
+  std::vector<const char *> m_res_all;
+
+  std::map<std::string, Attribute> m_counter_attrs;
+  std::map<std::string, Attribute> m_result_attrs;
+
+  std::map<std::string, int> m_counters_not_found;
+
+  Variant get_val_from_rec(const std::vector<Entry> &rec, const char *name);
+
+  TopdownCalculator(IntelTopdownLevel level, const char *top_counters,
+                    const char *all_counters,
+                    std::vector<const char *> &&res_top,
+                    std::vector<const char *> &&res_all);
+
+public:
+  TopdownCalculator(IntelTopdownLevel level);
+
+  virtual ~TopdownCalculator() = default;
+
+  virtual std::vector<Entry>
+  compute_toplevel(const std::vector<Entry> &rec) = 0;
+
+  virtual std::size_t get_num_expected_toplevel() const = 0;
+
+  virtual std::vector<Entry>
+  compute_retiring(const std::vector<Entry> &rec) = 0;
+
+  virtual std::size_t get_num_expected_retiring() const = 0;
+
+  virtual std::vector<Entry>
+  compute_backend_bound(const std::vector<Entry> &rec) = 0;
+
+  virtual std::size_t get_num_expected_backend_bound() const = 0;
+
+  virtual std::vector<Entry>
+  compute_frontend_bound(const std::vector<Entry> &rec) = 0;
+
+  virtual std::size_t get_num_expected_frontend_bound() const = 0;
+
+  virtual std::vector<Entry>
+  compute_bad_speculation(const std::vector<Entry> &rec) = 0;
+
+  virtual std::size_t get_num_expected_bad_speculation() const = 0;
+
+  bool find_counter_attrs(CaliperMetadataAccessInterface &db);
+
+  void make_result_attrs(CaliperMetadataAccessInterface &db);
+
+  const std::map<std::string, int> &get_counters_not_found() const;
+
+  const char *get_counters() const;
+
+  IntelTopdownLevel get_level() const;
+};
+
+} // namespace topdown
+} // namespace cali
+
+#endif /* CALI_TOPDOWN_TOPDOWN_CALCULATOR_H */
\ No newline at end of file

From ea6d73eea6abc561dacec43ed13c1c7c188e0105 Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Fri, 4 Oct 2024 16:54:24 -0400
Subject: [PATCH 05/11] Adds a 'disable_multiplexing' configuration to the Papi
 service and use that configuration in the topdown service

---
 src/services/papi/Papi.cpp                    | 46 +++++++++++--------
 src/services/topdown/HaswellTopdown.cpp       |  2 +
 src/services/topdown/HaswellTopdown.h         |  2 +
 src/services/topdown/IntelTopdown.cpp         |  4 ++
 .../topdown/SapphireRapidsTopdown.cpp         |  4 ++
 src/services/topdown/SapphireRapidsTopdown.h  |  2 +
 src/services/topdown/TopdownCalculator.h      |  2 +
 7 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/src/services/papi/Papi.cpp b/src/services/papi/Papi.cpp
index ac3f0d6d..eb111af3 100644
--- a/src/services/papi/Papi.cpp
+++ b/src/services/papi/Papi.cpp
@@ -64,6 +64,7 @@ class PapiService
     Attribute m_thread_attr;
 
     bool m_enable_multiplex;
+    bool m_disable_multiplex;
 
     unsigned m_num_eventsets;
     unsigned m_num_event_mismatch;
@@ -169,21 +170,20 @@ class PapiService
 
             int num = static_cast<int>(p.second->codes.size());
 
-            // if (cpi && (num > 4 /* magic number for Intel counter support :-( */ ||
-            // m_enable_multiplex)) {
-            //     if (Log::verbosity() >= 2)
-            //         Log(2).stream() << "papi: Initializing multiplex support for
-            //         component "
-            //                         << p.first << " (" << cpi->name << ")"
-            //                         << std::endl;
-
-            //     ret = PAPI_assign_eventset_component(eventset, p.first);
-            //     if (ret != PAPI_OK)
-            //         print_papi_error("PAPI_assign_eventset_component", ret);
-            //     ret = PAPI_set_multiplex(eventset);
-            //     if (ret != PAPI_OK)
-            //         print_papi_error("PAPI_set_multiplex", ret);
-            // }
+            if (!m_disable_multiplex && cpi
+                && (num > 4 /* magic number for Intel counter support :-( */ || m_enable_multiplex)) {
+                if (Log::verbosity() >= 2)
+                    Log(2).stream() << "papi: Initializing multiplex support for
+                        component "
+                                    << p.first << " (" << cpi->name << ")" << std::endl;
+
+                ret = PAPI_assign_eventset_component(eventset, p.first);
+                if (ret != PAPI_OK)
+                    print_papi_error("PAPI_assign_eventset_component", ret);
+                ret = PAPI_set_multiplex(eventset);
+                if (ret != PAPI_OK)
+                    print_papi_error("PAPI_set_multiplex", ret);
+            }
 
             ret = PAPI_add_events(eventset, p.second->codes.data(), num);
             if (ret < 0) {
@@ -370,6 +370,7 @@ class PapiService
 
     PapiService(Caliper* c, Channel* channel)
         : m_enable_multiplex(false),
+          m_disable_multiplex(false),
           m_num_eventsets(0),
           m_num_event_mismatch(0),
           m_num_failed_acquire(0),
@@ -450,7 +451,8 @@ class PapiService
         ++s_num_instances;
         PapiService* instance = new PapiService(c, channel);
 
-        instance->m_enable_multiplex = cfg.get("enable_multiplexing").to_bool();
+        instance->m_enable_multiplex  = cfg.get("enable_multiplexing").to_bool();
+        instance->m_disable_multiplex = cfg.get("disable_multiplexing").to_bool();
 
         if (!(instance->setup_event_info(c, eventlist) && instance->setup_thread_eventsets(c))) {
             Log(0).stream() << channel->name() << ": papi: Failed to initialize event sets, dropping papi service"
@@ -494,13 +496,21 @@ const char* PapiService::s_spec = R"json(
   "name": "counters",
   "description": "List of PAPI events to record",
   "type": "string"
- },{
+ },
+ {
   "name": "enable_multiplexing",
   "description": "Always enable multiplexing",
   "type": "bool",
   "value": "False"
+ },
+ {
+  "name": "disable_multiplexing",
+  "description": "Always disable multiplexing",
+  "type": "bool",
+  "value": "False"
  }
-]}
+]
+}
 )json";
 
 } // namespace
diff --git a/src/services/topdown/HaswellTopdown.cpp b/src/services/topdown/HaswellTopdown.cpp
index a04551c8..f149a6c5 100644
--- a/src/services/topdown/HaswellTopdown.cpp
+++ b/src/services/topdown/HaswellTopdown.cpp
@@ -39,6 +39,8 @@ HaswellTopdown::HaswellTopdown(IntelTopdownLevel level)
            "frontend_bandwidth", "memory_bound", "core_bound", "ext_mem_bound",
            "l1_bound", "l2_bound", "l3_bound"}) {}
 
+bool HaswellTopdown::check_for_disabled_multiplex() const { return false; }
+
 std::vector<Entry>
 HaswellTopdown::compute_toplevel(const std::vector<Entry> &rec) {
   std::vector<Entry> ret;
diff --git a/src/services/topdown/HaswellTopdown.h b/src/services/topdown/HaswellTopdown.h
index 39622d89..5ca0a9be 100644
--- a/src/services/topdown/HaswellTopdown.h
+++ b/src/services/topdown/HaswellTopdown.h
@@ -12,6 +12,8 @@ class HaswellTopdown : public TopdownCalculator {
 
   virtual ~HaswellTopdown() = default;
 
+  virtual bool check_for_disabled_multiplex() const override;
+
   virtual std::vector<Entry>
   compute_toplevel(const std::vector<Entry> &rec) override;
 
diff --git a/src/services/topdown/IntelTopdown.cpp b/src/services/topdown/IntelTopdown.cpp
index 507a7648..04eabf1b 100644
--- a/src/services/topdown/IntelTopdown.cpp
+++ b/src/services/topdown/IntelTopdown.cpp
@@ -172,6 +172,10 @@ class IntelTopdown
 #endif
 
         channel->config().set("CALI_PAPI_COUNTERS", calculator->get_counters());
+        // Some PAPI counters for topdown (particularly on SPR) don't play nice
+        // with PAPI multiplexing. Ask the TopdownCalculator class whether we need
+        // to disable multiplexing for the corresponding architecture.
+        channel->config().set("CALI_PAPI_DISABLE_MULTIPLEXING", calculator->check_for_disabled_multiplex());
 
         if (!cali::services::register_service(c, channel, "papi")) {
             Log(0).stream() << channel->name() << ": topdown: Unable to register papi service, skipping topdown"
diff --git a/src/services/topdown/SapphireRapidsTopdown.cpp b/src/services/topdown/SapphireRapidsTopdown.cpp
index 457489b3..1739e144 100644
--- a/src/services/topdown/SapphireRapidsTopdown.cpp
+++ b/src/services/topdown/SapphireRapidsTopdown.cpp
@@ -34,6 +34,10 @@ SapphireRapidsTopdown::SapphireRapidsTopdown(IntelTopdownLevel level)
            "frontend_bandwidth", "memory_bound", "core_bound", "light_ops",
            "heavy_ops"}) {}
 
+bool SapphireRapidsTopdown::check_for_disabled_multiplex() const {
+  return true;
+}
+
 std::vector<Entry>
 SapphireRapidsTopdown::compute_toplevel(const std::vector<Entry> &rec) {
   std::vector<Entry> ret;
diff --git a/src/services/topdown/SapphireRapidsTopdown.h b/src/services/topdown/SapphireRapidsTopdown.h
index 5038305b..8fc75282 100644
--- a/src/services/topdown/SapphireRapidsTopdown.h
+++ b/src/services/topdown/SapphireRapidsTopdown.h
@@ -12,6 +12,8 @@ class SapphireRapidsTopdown : public TopdownCalculator {
 
   virtual ~SapphireRapidsTopdown() = default;
 
+  virtual bool check_for_disabled_multiplex() const override;
+
   virtual std::vector<Entry>
   compute_toplevel(const std::vector<Entry> &rec) override;
 
diff --git a/src/services/topdown/TopdownCalculator.h b/src/services/topdown/TopdownCalculator.h
index eb40043a..e478d723 100644
--- a/src/services/topdown/TopdownCalculator.h
+++ b/src/services/topdown/TopdownCalculator.h
@@ -38,6 +38,8 @@ class TopdownCalculator {
 
   virtual ~TopdownCalculator() = default;
 
+  virtual bool check_for_disabled_multiplex() const = 0;
+
   virtual std::vector<Entry>
   compute_toplevel(const std::vector<Entry> &rec) = 0;
 

From 26f46a39b52a47e7ba3265873fa83c7410d93207 Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Mon, 7 Oct 2024 09:19:46 -0700
Subject: [PATCH 06/11] Checks whether PAPI uses rdpmc on SPR in the topdown
 service

---
 src/services/topdown/CMakeLists.txt | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/services/topdown/CMakeLists.txt b/src/services/topdown/CMakeLists.txt
index 78a8bef5..bf5ea84e 100644
--- a/src/services/topdown/CMakeLists.txt
+++ b/src/services/topdown/CMakeLists.txt
@@ -4,6 +4,21 @@ set(CALIPER_TOPDOWN_SOURCES
   HaswellTopdown.cpp
   SapphireRapidsTopdown.cpp)
 
+if (CALIPER_HAVE_ARCH STREQUAL "sapphirerapids")
+  if (NOT EXISTS ${PAPI_PREFIX}/bin/papi_coponent_avail)
+    message(WARNING "Cannot check if PAPI uses rdpmc. Note that the topdown service will not work correctly on Sapphire Rapids if rdpmc is enabled. This will be fixed by a future version of PAPI.")
+  else ()
+    execute_process(
+      COMMAND ${PAPI_PREFIX}/bin/papi_coponent_avail
+      OUTPUT_VARIABLE CALIPER_TOPDOWN_PAPI_COMPONENTS
+    )
+    string(FIND ${CALIPER_TOPDOWN_PAPI_COMPONENTS} "Fast counter read (rdpmc): yes" CALIPER_TOPDOWN_PAPI_USES_RDPMC)
+    if (CALIPER_TOPDOWN_PAPI_USES_RDPMC EQUAL "-1")
+      message(WARNING "Detected that PAPI uses rdpmc to read counters. The topdown service will not work correctly on Sapphire Rapids if rdpmc is enabled. This will be fixed by a future version of PAPI.")
+    endif ()
+  endif()
+endif ()
+
 add_library(caliper-topdown OBJECT ${CALIPER_TOPDOWN_SOURCES})
 
 add_service_objlib("caliper-topdown")

From 0d98d08ad3fabe73c67fac49cd17add263c91e90 Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Tue, 8 Oct 2024 06:53:42 -0700
Subject: [PATCH 07/11] Reworks SPR topdown implementation to use rdpmc-style
 values instead of raw counter values

---
 .../topdown/SapphireRapidsTopdown.cpp         | 178 ++++++------------
 1 file changed, 60 insertions(+), 118 deletions(-)

diff --git a/src/services/topdown/SapphireRapidsTopdown.cpp b/src/services/topdown/SapphireRapidsTopdown.cpp
index 1739e144..a7e55bcf 100644
--- a/src/services/topdown/SapphireRapidsTopdown.cpp
+++ b/src/services/topdown/SapphireRapidsTopdown.cpp
@@ -2,6 +2,21 @@
 
 #include <algorithm>
 
+#define RETIRING_OFFSET 0
+#define BAD_SPEC_OFFSET 1
+#define FE_BOUND_OFFSET 2
+#define BE_BOUND_OFFSET 3
+
+#define HEAVY_OPS_OFFSET 4
+#define BR_MISPRED_OFFSET 5
+#define FETCH_LAT_OFFSET 6
+#define MEM_BOUND_OFFSET 7
+
+static double get_tma_percent_from_rdpmc_value(uint64_t rdpmc_value,
+                                               uint64_t offset) {
+  return (double)((rdpmc_value >> (offset * 8)) & 0xff) / 0xff;
+}
+
 namespace cali {
 namespace topdown {
 
@@ -10,22 +25,10 @@ SapphireRapidsTopdown::SapphireRapidsTopdown(IntelTopdownLevel level)
           level,
           // top_counters
           "perf::slots"
-          ",perf::topdown-retiring"
-          ",perf::topdown-bad-spec"
-          ",perf::topdown-fe-bound"
-          ",perf::topdown-be-bound"
-          ",INT_MISC:UOP_DROPPING",
+          ",perf::topdown-retiring",
           // all_counters
           "perf::slots"
-          ",perf::topdown-retiring"
-          ",perf::topdown-bad-spec"
-          ",perf::topdown-fe-bound"
-          ",perf::topdown-be-bound"
-          ",INT_MISC:UOP_DROPPING"
-          ",perf_raw::r8400"  // topdown-heavy-ops
-          ",perf_raw::r8500"  // topdown-br-mispredict
-          ",perf_raw::r8600"  // topdown-fetch-lat
-          ",perf_raw::r8700", // topdown-mem-bound
+          ",perf::topdown-retiring",
           // res_top
           {"retiring", "backend_bound", "frontend_bound", "bad_speculation"},
           // res_all
@@ -44,43 +47,29 @@ SapphireRapidsTopdown::compute_toplevel(const std::vector<Entry> &rec) {
 
   // Get PAPI metrics for toplevel calculations
   Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_int_misc_uop_dropping =
-      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
+  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
 
   // Check if any Variant is empty (use .empty())
-  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
-                       v_bad_spec.empty() || v_retiring.empty() ||
-                       v_int_misc_uop_dropping.empty() ||
-                       v_slots_or_info_thread_slots.empty();
+  bool is_incomplete =
+      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
   // Check if all Variants are greater than 0 when casted to doubles (use
   // .to_double())
-  bool is_nonzero =
-      v_fe_bound.to_double() > 0.0 && v_be_bound.to_double() > 0.0 &&
-      v_bad_spec.to_double() > 0.0 && v_retiring.to_double() > 0.0 &&
-      v_int_misc_uop_dropping.to_double() > 0.0 &&
-      v_slots_or_info_thread_slots.to_double() > 0.0;
+  bool is_nonzero = v_tma_metrics.to_uint() > 0;
 
   // Check if bad values were obtained
   if (is_incomplete || !is_nonzero)
     return ret;
 
-  // Perform toplevel calcs
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-
-  double retiring = (v_retiring.to_double() / toplevel_sum) +
-                    (0 * v_slots_or_info_thread_slots.to_double());
-  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
-                          (v_int_misc_uop_dropping.to_double() /
-                           v_slots_or_info_thread_slots.to_double());
-  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
-                         (0 * v_slots_or_info_thread_slots.to_double());
+  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
+
+  double retiring =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET);
+  double frontend_bound =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET);
+  double backend_bound =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET);
   double bad_speculation =
-      std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET);
 
   // Add toplevel metrics to vector of Entry
   ret.reserve(4);
@@ -106,30 +95,22 @@ SapphireRapidsTopdown::compute_retiring(const std::vector<Entry> &rec) {
 
   // Get PAPI metrics for toplevel calculations
   Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_heavy_ops = get_val_from_rec(rec, "perf_raw::r8400");
+  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
 
   // Check if any Variant is empty (use .empty())
-  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
-                       v_bad_spec.empty() || v_retiring.empty() ||
-                       v_slots_or_info_thread_slots.empty() ||
-                       v_heavy_ops.empty();
+  bool is_incomplete =
+      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
 
   // Check if bad values were obtained
   if (is_incomplete)
     return ret;
 
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-  // Copied from compute_toplevel
-  double retiring = (v_retiring.to_double() / toplevel_sum) +
-                    (0 * v_slots_or_info_thread_slots.to_double());
+  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
 
-  double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) +
-                     (0 * v_slots_or_info_thread_slots.to_double());
+  double retiring =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET);
+  double heavy_ops =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, HEAVY_OPS_OFFSET);
   double light_ops = std::max(0.0, retiring - heavy_ops);
 
   // Add toplevel metrics to vector of Entry
@@ -152,30 +133,22 @@ SapphireRapidsTopdown::compute_backend_bound(const std::vector<Entry> &rec) {
 
   // Get PAPI metrics for toplevel calculations
   Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_memory_bound = get_val_from_rec(rec, "perf_raw::r8700");
+  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
 
   // Check if any Variant is empty (use .empty())
-  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
-                       v_bad_spec.empty() || v_retiring.empty() ||
-                       v_slots_or_info_thread_slots.empty() ||
-                       v_memory_bound.empty();
+  bool is_incomplete =
+      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
 
   // Check if bad values were obtained
   if (is_incomplete)
     return ret;
 
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-  // Copied from compute_toplevel
-  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
-                         (0 * v_slots_or_info_thread_slots.to_double());
+  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
 
-  double memory_bound = (v_memory_bound.to_double() / toplevel_sum) +
-                        (0 * v_slots_or_info_thread_slots.to_double());
+  double backend_bound =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET);
+  double memory_bound =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, MEM_BOUND_OFFSET);
   double core_bound = std::max(0.0, backend_bound - memory_bound);
 
   // Add toplevel metrics to vector of Entry
@@ -198,35 +171,22 @@ SapphireRapidsTopdown::compute_frontend_bound(const std::vector<Entry> &rec) {
 
   // Get PAPI metrics for toplevel calculations
   Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_int_misc_uop_dropping =
-      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
-  Variant v_fetch_latency = get_val_from_rec(rec, "perf_raw::r8600");
+  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
 
   // Check if any Variant is empty (use .empty())
   bool is_incomplete =
-      v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
-      v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
-      v_slots_or_info_thread_slots.empty() || v_fetch_latency.empty();
+      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
 
   // Check if bad values were obtained
   if (is_incomplete)
     return ret;
 
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-  // Copied from compute_toplevel
-  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
-                          (v_int_misc_uop_dropping.to_double() /
-                           v_slots_or_info_thread_slots.to_double());
-
-  double fetch_latency = (v_fetch_latency.to_double() / toplevel_sum) -
-                         (v_int_misc_uop_dropping.to_double() /
-                          v_slots_or_info_thread_slots.to_double());
+  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
 
+  double frontend_bound =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET);
+  double fetch_latency =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FETCH_LAT_OFFSET);
   double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency);
 
   // Add toplevel metrics to vector of Entry
@@ -249,40 +209,22 @@ SapphireRapidsTopdown::compute_bad_speculation(const std::vector<Entry> &rec) {
 
   // Get PAPI metrics for toplevel calculations
   Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_int_misc_uop_dropping =
-      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
-  Variant v_branch_mispredict = get_val_from_rec(rec, "perf_raw::r8500");
+  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
 
   // Check if any Variant is empty (use .empty())
   bool is_incomplete =
-      v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
-      v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
-      v_slots_or_info_thread_slots.empty() || v_branch_mispredict.empty();
+      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
 
   // Check if bad values were obtained
   if (is_incomplete)
     return ret;
 
-  // Perform toplevel calcs
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-
-  double retiring = (v_retiring.to_double() / toplevel_sum) +
-                    (0 * v_slots_or_info_thread_slots.to_double());
-  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
-                          (v_int_misc_uop_dropping.to_double() /
-                           v_slots_or_info_thread_slots.to_double());
-  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
-                         (0 * v_slots_or_info_thread_slots.to_double());
-  double bad_speculation =
-      std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
+  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
 
-  double branch_mispredict = (v_branch_mispredict.to_double() / toplevel_sum) +
-                             (0 * v_slots_or_info_thread_slots.to_double());
+  double bad_speculation =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET);
+  double branch_mispredict = get_tma_percent_from_rdpmc_value(
+      tma_metric_papi_rdpmc, BR_MISPRED_OFFSET);
   double machine_clears = std::max(0.0, bad_speculation - branch_mispredict);
 
   // Add toplevel metrics to vector of Entry

From 28ff918b8bff8e90b439676da1896180e75e7d48 Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Tue, 8 Oct 2024 08:26:48 -0700
Subject: [PATCH 08/11] Updates option spec for SPR topdown and adds
 instruction comments for making new topdown calculators

---
 src/caliper/controllers/controllers.cpp    | 36 ++++------------------
 src/services/papi/Papi.cpp                 |  5 ++-
 src/services/topdown/CMakeLists.txt        |  6 ++--
 src/services/topdown/IntelTopdown.cpp      | 11 ++++---
 src/services/topdown/TopdownCalculator.cpp |  5 +--
 src/services/topdown/TopdownCalculator.h   | 13 ++++++++
 6 files changed, 34 insertions(+), 42 deletions(-)

diff --git a/src/caliper/controllers/controllers.cpp b/src/caliper/controllers/controllers.cpp
index 5e70da15..a0c33e73 100644
--- a/src/caliper/controllers/controllers.cpp
+++ b/src/caliper/controllers/controllers.cpp
@@ -1240,28 +1240,20 @@ const char* builtin_papi_spr_option_specs = R"json(
      "config"      :
      {
        "CALI_PAPI_COUNTERS":
-         "perf::slots,perf::topdown-retiring,perf::topdown-bad-spec,perf::topdown-fe-bound,perf::topdown-be-bound,INT_MISC:UOP_DROPPING"
+         "perf::slots,perf::topdown-retiring"
      },
      "query"  :
      [
       { "level": "local", "select":
        [
         "inclusive_sum(sum#papi.slots) as slots",
-        "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring",
-        "inclusive_sum(sum#papi.perf::topdown-bad-spec) as topdown_bad_spec",
-        "inclusive_sum(sum#papi.perf::topdown-fe-bound) as topdown_fe_bound",
-        "inclusive_sum(sum#papi.perf::topdown-be-bound) as topdown_be_bound",
-        "inclusive_sum(sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping"
+        "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring"
        ]
       },
       { "level": "cross", "select":
        [
         "sum(inclusive#sum#papi.slots) as slots",
-        "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring",
-        "sum(inclusive#sum#papi.perf::topdown-bad-spec) as topdown_bad_spec",
-        "sum(inclusive#sum#papi.perf::topdown-fe-bound) as topdown_fe_bound",
-        "sum(inclusive#sum#papi.perf::topdown-be-bound) as topdown_be_bound",
-        "sum(inclusive#sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping"
+        "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring"
        ]
       }
      ]
@@ -1275,36 +1267,20 @@ const char* builtin_papi_spr_option_specs = R"json(
      "config"      :
      {
        "CALI_PAPI_COUNTERS":
-         "perf::slots,perf::topdown-retiring,perf::topdown-bad-spec,perf::topdown-fe-bound,perf::topdown-be-bound,INT_MISC:UOP_DROPPING,perf_raw::r8400,perf_raw::r8500,perf_raw::r8600,perf_raw::r8700"
+         "perf::slots,perf::topdown-retiring"
      },
      "query"  :
      [
       { "level": "local", "select":
        [
         "inclusive_sum(sum#papi.slots) as slots",
-        "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring",
-        "inclusive_sum(sum#papi.perf::topdown-bad-spec) as topdown_bad_spec",
-        "inclusive_sum(sum#papi.perf::topdown-fe-bound) as topdown_fe_bound",
-        "inclusive_sum(sum#papi.perf::topdown-be-bound) as topdown_be_bound",
-        "inclusive_sum(sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping",
-        "inclusive_sum(sum#papi.perf_raw::r8400) as topdown_heavy_ops",
-        "inclusive_sum(sum#papi.perf_raw::r8500) as topdown_br_mispredict",
-        "inclusive_sum(sum#papi.perf_raw::r8600) as topdown_fetch_lat",
-        "inclusive_sum(sum#papi.perf_raw::r8700) as topdown_mem_bound"
+        "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring"
        ]
       },
       { "level": "cross", "select":
        [
         "sum(inclusive#sum#papi.slots) as slots",
-        "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring",
-        "sum(inclusive#sum#papi.perf::topdown-bad-spec) as topdown_bad_spec",
-        "sum(inclusive#sum#papi.perf::topdown-fe-bound) as topdown_fe_bound",
-        "sum(inclusive#sum#papi.perf::topdown-be-bound) as topdown_be_bound",
-        "sum(inclusive#sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping",
-        "sum(inclusive#sum#papi.perf_raw::r8400) as topdown_heavy_ops",
-        "sum(inclusive#sum#papi.perf_raw::r8500) as topdown_br_mispredict",
-        "sum(inclusive#sum#papi.perf_raw::r8600) as topdown_fetch_lat",
-        "sum(inclusive#sum#papi.perf_raw::r8700) as topdown_mem_bound"
+        "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring"
        ]
       }
      ]
diff --git a/src/services/papi/Papi.cpp b/src/services/papi/Papi.cpp
index eb111af3..ed9ea6ff 100644
--- a/src/services/papi/Papi.cpp
+++ b/src/services/papi/Papi.cpp
@@ -173,9 +173,8 @@ class PapiService
             if (!m_disable_multiplex && cpi
                 && (num > 4 /* magic number for Intel counter support :-( */ || m_enable_multiplex)) {
                 if (Log::verbosity() >= 2)
-                    Log(2).stream() << "papi: Initializing multiplex support for
-                        component "
-                                    << p.first << " (" << cpi->name << ")" << std::endl;
+                    Log(2).stream() << "papi: Initializing multiplex support for component " << p.first << " ("
+                                    << cpi->name << ")" << std::endl;
 
                 ret = PAPI_assign_eventset_component(eventset, p.first);
                 if (ret != PAPI_OK)
diff --git a/src/services/topdown/CMakeLists.txt b/src/services/topdown/CMakeLists.txt
index bf5ea84e..d5dd230c 100644
--- a/src/services/topdown/CMakeLists.txt
+++ b/src/services/topdown/CMakeLists.txt
@@ -1,12 +1,12 @@
 set(CALIPER_TOPDOWN_SOURCES
   IntelTopdown.cpp
-  TopdownCalulator.cpp
+  TopdownCalculator.cpp
   HaswellTopdown.cpp
   SapphireRapidsTopdown.cpp)
 
 if (CALIPER_HAVE_ARCH STREQUAL "sapphirerapids")
   if (NOT EXISTS ${PAPI_PREFIX}/bin/papi_coponent_avail)
-    message(WARNING "Cannot check if PAPI uses rdpmc. Note that the topdown service will not work correctly on Sapphire Rapids if rdpmc is enabled. This will be fixed by a future version of PAPI.")
+    message(WARNING "Cannot check if PAPI uses rdpmc. Note that the topdown service will not work correctly on Sapphire Rapids if rdpmc is NOT enabled. This will be fixed by a future version of PAPI.")
   else ()
     execute_process(
       COMMAND ${PAPI_PREFIX}/bin/papi_coponent_avail
@@ -14,7 +14,7 @@ if (CALIPER_HAVE_ARCH STREQUAL "sapphirerapids")
     )
     string(FIND ${CALIPER_TOPDOWN_PAPI_COMPONENTS} "Fast counter read (rdpmc): yes" CALIPER_TOPDOWN_PAPI_USES_RDPMC)
     if (CALIPER_TOPDOWN_PAPI_USES_RDPMC EQUAL "-1")
-      message(WARNING "Detected that PAPI uses rdpmc to read counters. The topdown service will not work correctly on Sapphire Rapids if rdpmc is enabled. This will be fixed by a future version of PAPI.")
+      message(WARNING "Detected that PAPI does not use rdpmc to read counters. The topdown service will not work correctly on Sapphire Rapids if rdpmc is NOT enabled. This will be fixed by a future version of PAPI.")
     endif ()
   endif()
 endif ()
diff --git a/src/services/topdown/IntelTopdown.cpp b/src/services/topdown/IntelTopdown.cpp
index 04eabf1b..840dcef1 100644
--- a/src/services/topdown/IntelTopdown.cpp
+++ b/src/services/topdown/IntelTopdown.cpp
@@ -59,7 +59,7 @@ class IntelTopdown
             ++num_top_computed;
         }
 
-        if (m_level == All) {
+        if (m_level == cali::topdown::All) {
             result = m_calculator->compute_backend_bound(rec);
 
             if (result.size() != m_calculator->get_num_expected_backend_bound()) {
@@ -146,13 +146,13 @@ class IntelTopdown
 
     static void intel_topdown_register(Caliper* c, Channel* channel)
     {
-        cali::topdown::IntelTopdownLevel level = Top;
+        cali::topdown::IntelTopdownLevel level = cali::topdown::Top;
 
         auto        config = services::init_config_from_spec(channel->config(), s_spec);
         std::string lvlcfg = config.get("level").to_string();
 
         if (lvlcfg == "all") {
-            level = All;
+            level = cali::topdown::All;
         } else if (lvlcfg != "top") {
             Log(0).stream() << channel->name() << ": topdown: Unknown level \"" << lvlcfg << "\", skipping topdown"
                             << std::endl;
@@ -175,7 +175,10 @@ class IntelTopdown
         // Some PAPI counters for topdown (particularly on SPR) don't play nice
         // with PAPI multiplexing. Ask the TopdownCalculator class whether we need
         // to disable multiplexing for the corresponding architecture.
-        channel->config().set("CALI_PAPI_DISABLE_MULTIPLEXING", calculator->check_for_disabled_multiplex());
+        channel->config().set(
+            "CALI_PAPI_DISABLE_MULTIPLEXING",
+            calculator->check_for_disabled_multiplex() ? "true" : "false"
+        );
 
         if (!cali::services::register_service(c, channel, "papi")) {
             Log(0).stream() << channel->name() << ": topdown: Unable to register papi service, skipping topdown"
diff --git a/src/services/topdown/TopdownCalculator.cpp b/src/services/topdown/TopdownCalculator.cpp
index d1c8909b..bbfa386f 100644
--- a/src/services/topdown/TopdownCalculator.cpp
+++ b/src/services/topdown/TopdownCalculator.cpp
@@ -1,6 +1,7 @@
 #include "TopdownCalculator.h"
 
 #include "caliper/common/Log.h"
+#include "caliper/common/StringConverter.h"
 
 #include <algorithm>
 
@@ -47,9 +48,9 @@ bool TopdownCalculator::find_counter_attrs(CaliperMetadataAccessInterface &db) {
   for (const auto &s : counters) {
     Attribute attr = db.get_attribute(std::string("sum#papi.") + s);
 
-    if (attr == Attribute::invalid)
+    if (!attr)
       attr = db.get_attribute(std::string("papi.") + s);
-    if (attr == Attribute::invalid) {
+    if (!attr) {
       Log(0).stream() << "topdown: " << s << " counter attribute not found!"
                       << std::endl;
       return false;
diff --git a/src/services/topdown/TopdownCalculator.h b/src/services/topdown/TopdownCalculator.h
index e478d723..9841580e 100644
--- a/src/services/topdown/TopdownCalculator.h
+++ b/src/services/topdown/TopdownCalculator.h
@@ -6,6 +6,19 @@
 #include <map>
 #include <vector>
 
+// clang-format off
+/* How to create a new topdown calculation plugin:
+ * 
+ * Step 1: Create a subclass of this class implementing the calculations for the new 
+ *         architecture (see Haswell and SPR as examples)
+ * Step 2: Edit IntelTopdown::intel_topdown_register in IntelTopdown.cpp with logic for
+ *         creating an instance of your subclass (edits should be made around line 165)
+ * Step 3: Edit CMakeLists.txt to include the source file for your new subclass
+ * Step 4: Edit the 'get_builtin_option_specs' function in src/caliper/controllers/controllers.cpp
+ *         to add the appropriate option spec for your architecture in the topdown service
+ */
+// clang-format on
+
 namespace cali {
 namespace topdown {
 

From eec5ea906e4d98039f5cbfef44282ff24c5013e6 Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Tue, 8 Oct 2024 16:27:47 -0400
Subject: [PATCH 09/11] Adds a CMake flag to let users tell us if PAPI is built
 to use rdpmc or not

---
 CMakeLists.txt                                |   2 +
 caliper-config.h.in                           |   3 +
 src/caliper/controllers/controllers.cpp       | 383 ++++++++++++------
 src/services/topdown/CMakeLists.txt           |  24 +-
 ...wn.cpp => SapphireRapidsTopdown_rdpmc.cpp} |   0
 .../topdown/SapphireRapidsTopdown_read.cpp    | 303 ++++++++++++++
 6 files changed, 588 insertions(+), 127 deletions(-)
 rename src/services/topdown/{SapphireRapidsTopdown.cpp => SapphireRapidsTopdown_rdpmc.cpp} (100%)
 create mode 100644 src/services/topdown/SapphireRapidsTopdown_read.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 496d4c2d..70f21a9e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -82,6 +82,8 @@ add_caliper_option(WITH_VARIORUM  "Enable Variorum support" FALSE)
 add_caliper_option(WITH_UMPIRE    "Enable Umpire statistics support" FALSE)
 add_caliper_option(WITH_CRAYPAT   "Enable CrayPAT region forwarding support" FALSE)
 add_caliper_option(WITH_LDMS      "Enable LDMS forwarder" FALSE)
+add_caliper_option(WITH_PAPI_RDPMC "Declare that PAPI is built to use rdpmc for reading counters. Does nothing if PAPI support is not enabled." TRUE)
+
 
 set(WITH_ARCH "" CACHE STRING "Enable features specific to the provided archspec CPU architecture name")
 if (NOT WITH_ARCH STREQUAL "")
diff --git a/caliper-config.h.in b/caliper-config.h.in
index d5602e97..366c407e 100644
--- a/caliper-config.h.in
+++ b/caliper-config.h.in
@@ -27,6 +27,9 @@
 #cmakedefine CALIPER_HAVE_CRAYPAT
 #cmakedefine CALIPER_HAVE_LDMS
 #cmakedefine CALIPER_HAVE_ARCH "@CALIPER_HAVE_ARCH@"
+#ifdef CALIPER_HAVE_PAPI
+#cmakedefine CALIPER_WITH_PAPI_RDPMC
+#endif
 
 #cmakedefine CALIPER_REDUCED_CONSTEXPR_USAGE
 
diff --git a/src/caliper/controllers/controllers.cpp b/src/caliper/controllers/controllers.cpp
index a0c33e73..cbb08bcc 100644
--- a/src/caliper/controllers/controllers.cpp
+++ b/src/caliper/controllers/controllers.cpp
@@ -1160,132 +1160,289 @@ const char* builtin_papi_hsw_option_specs = R"json(
 ]
 )json";
 
+#ifdef CALIPER_WITH_PAPI_RDPMC
 const char* builtin_papi_spr_option_specs = R"json(
-    {
-     "name"        : "topdown.toplevel",
-     "description" : "Top-down analysis for Intel CPUs (top level)",
-     "type"        : "bool",
-     "category"    : "metric",
-     "services"    : [ "topdown" ],
-     "config"      : { "CALI_TOPDOWN_LEVEL": "top" },
-     "query"  :
+[
+  {
+   "name"        : "topdown.toplevel",
+   "description" : "Top-down analysis for Intel CPUs (top level)",
+   "type"        : "bool",
+   "category"    : "metric",
+   "services"    : [ "topdown" ],
+   "config"      : { "CALI_TOPDOWN_LEVEL": "top" },
+   "query"  :
+   [
+    { "level": "local", "select":
      [
-      { "level": "local", "select":
-       [
-        "any(topdown.retiring) as \"Retiring\"",
-        "any(topdown.backend_bound) as \"Backend bound\"",
-        "any(topdown.frontend_bound) as \"Frontend bound\"",
-        "any(topdown.bad_speculation) as \"Bad speculation\""
-       ]
-      },
-      { "level": "cross", "select":
-       [
-        "any(any#topdown.retiring) as \"Retiring\"",
-        "any(any#topdown.backend_bound) as \"Backend bound\"",
-        "any(any#topdown.frontend_bound) as \"Frontend bound\"",
-        "any(any#topdown.bad_speculation) as \"Bad speculation\""
-       ]
-      }
+      "any(topdown.retiring) as \"Retiring\"",
+      "any(topdown.backend_bound) as \"Backend bound\"",
+      "any(topdown.frontend_bound) as \"Frontend bound\"",
+      "any(topdown.bad_speculation) as \"Bad speculation\""
      ]
     },
-    {
-     "name"        : "topdown.all",
-     "description" : "Top-down analysis for Intel CPUs (all levels)",
-     "type"        : "bool",
-     "category"    : "metric",
-     "services"    : [ "topdown" ],
-     "config"      : { "CALI_TOPDOWN_LEVEL": "all" },
-     "query"  :
+    { "level": "cross", "select":
+     [
+      "any(any#topdown.retiring) as \"Retiring\"",
+      "any(any#topdown.backend_bound) as \"Backend bound\"",
+      "any(any#topdown.frontend_bound) as \"Frontend bound\"",
+      "any(any#topdown.bad_speculation) as \"Bad speculation\""
+     ]
+    }
+   ]
+  },
+  {
+   "name"        : "topdown.all",
+   "description" : "Top-down analysis for Intel CPUs (all levels)",
+   "type"        : "bool",
+   "category"    : "metric",
+   "services"    : [ "topdown" ],
+   "config"      : { "CALI_TOPDOWN_LEVEL": "all" },
+   "query"  :
+   [
+    { "level": "local", "select":
+     [
+      "any(topdown.retiring) as \"Retiring\"",
+      "any(topdown.backend_bound) as \"Backend bound\"",
+      "any(topdown.frontend_bound) as \"Frontend bound\"",
+      "any(topdown.bad_speculation) as \"Bad speculation\"",
+      "any(topdown.branch_mispredict) as \"Branch mispredict\"",
+      "any(topdown.machine_clears) as \"Machine clears\"",
+      "any(topdown.frontend_latency) as \"Frontend latency\"",
+      "any(topdown.frontend_bandwidth) as \"Frontend bandwidth\"",
+      "any(topdown.memory_bound) as \"Memory bound\"",
+      "any(topdown.core_bound) as \"Core bound\"",
+      "any(topdown.light_ops) as \"Light operations\"",
+      "any(topdown.heavy_ops) as \"Heavy operations\""
+     ]
+    },
+    { "level": "cross", "select":
+     [
+      "any(any#topdown.retiring) as \"Retiring\"",
+      "any(any#topdown.backend_bound) as \"Backend bound\"",
+      "any(any#topdown.frontend_bound) as \"Frontend bound\"",
+      "any(any#topdown.bad_speculation) as \"Bad speculation\"",
+      "any(any#topdown.branch_mispredict) as \"Branch mispredict\"",
+      "any(any#topdown.machine_clears) as \"Machine clears\"",
+      "any(any#topdown.frontend_latency) as \"Frontend latency\"",
+      "any(any#topdown.frontend_bandwidth) as \"Frontend bandwidth\"",
+      "any(any#topdown.memory_bound) as \"Memory bound\"",
+      "any(any#topdown.core_bound) as \"Core bound\"",
+      "any(any#topdown.light_ops) as \"Light operations\"",
+      "any(any#topdown.heavy_ops) as \"Heavy operations\""
+     ]
+    }
+   ]
+  },
+  {
+   "name"        : "topdown-counters.toplevel",
+   "description" : "Raw counter values for Intel top-down analysis (top level)",
+   "type"        : "bool",
+   "category"    : "metric",
+   "services"    : [ "papi" ],
+   "config"      :
+   {
+     "CALI_PAPI_COUNTERS":
+       "perf::slots,perf::topdown-retiring"
+   },
+   "query"  :
+   [
+    { "level": "local", "select":
      [
-      { "level": "local", "select":
-       [
-        "any(topdown.retiring) as \"Retiring\"",
-        "any(topdown.backend_bound) as \"Backend bound\"",
-        "any(topdown.frontend_bound) as \"Frontend bound\"",
-        "any(topdown.bad_speculation) as \"Bad speculation\"",
-        "any(topdown.branch_mispredict) as \"Branch mispredict\"",
-        "any(topdown.machine_clears) as \"Machine clears\"",
-        "any(topdown.frontend_latency) as \"Frontend latency\"",
-        "any(topdown.frontend_bandwidth) as \"Frontend bandwidth\"",
-        "any(topdown.memory_bound) as \"Memory bound\"",
-        "any(topdown.core_bound) as \"Core bound\"",
-        "any(topdown.light_ops) as \"Light operations\"",
-        "any(topdown.heavy_ops) as \"Heavy operations\""
-       ]
-      },
-      { "level": "cross", "select":
-       [
-        "any(any#topdown.retiring) as \"Retiring\"",
-        "any(any#topdown.backend_bound) as \"Backend bound\"",
-        "any(any#topdown.frontend_bound) as \"Frontend bound\"",
-        "any(any#topdown.bad_speculation) as \"Bad speculation\"",
-        "any(any#topdown.branch_mispredict) as \"Branch mispredict\"",
-        "any(any#topdown.machine_clears) as \"Machine clears\"",
-        "any(any#topdown.frontend_latency) as \"Frontend latency\"",
-        "any(any#topdown.frontend_bandwidth) as \"Frontend bandwidth\"",
-        "any(any#topdown.memory_bound) as \"Memory bound\"",
-        "any(any#topdown.core_bound) as \"Core bound\"",
-        "any(any#topdown.light_ops) as \"Light operations\"",
-        "any(any#topdown.heavy_ops) as \"Heavy operations\""
-       ]
-      }
+      "inclusive_sum(sum#papi.slots) as slots",
+      "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring"
      ]
     },
-    {
-     "name"        : "topdown-counters.toplevel",
-     "description" : "Raw counter values for Intel top-down analysis (top level)",
-     "type"        : "bool",
-     "category"    : "metric",
-     "services"    : [ "papi" ],
-     "config"      :
-     {
-       "CALI_PAPI_COUNTERS":
-         "perf::slots,perf::topdown-retiring"
-     },
-     "query"  :
+    { "level": "cross", "select":
+     [
+      "sum(inclusive#sum#papi.slots) as slots",
+      "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring"
+     ]
+    }
+   ]
+  },
+  {
+   "name"        : "topdown-counters.all",
+   "description" : "Raw counter values for Intel top-down analysis (all levels)",
+   "type"        : "bool",
+   "category"    : "metric",
+   "services"    : [ "papi" ],
+   "config"      :
+   {
+     "CALI_PAPI_COUNTERS":
+       "perf::slots,perf::topdown-retiring"
+   },
+   "query"  :
+   [
+    { "level": "local", "select":
      [
-      { "level": "local", "select":
-       [
-        "inclusive_sum(sum#papi.slots) as slots",
-        "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring"
-       ]
-      },
-      { "level": "cross", "select":
-       [
-        "sum(inclusive#sum#papi.slots) as slots",
-        "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring"
-       ]
-      }
+      "inclusive_sum(sum#papi.slots) as slots",
+      "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring"
      ]
     },
-    {
-     "name"        : "topdown-counters.all",
-     "description" : "Raw counter values for Intel top-down analysis (all levels)",
-     "type"        : "bool",
-     "category"    : "metric",
-     "services"    : [ "papi" ],
-     "config"      :
-     {
-       "CALI_PAPI_COUNTERS":
-         "perf::slots,perf::topdown-retiring"
-     },
-     "query"  :
+    { "level": "cross", "select":
      [
-      { "level": "local", "select":
-       [
-        "inclusive_sum(sum#papi.slots) as slots",
-        "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring"
-       ]
-      },
-      { "level": "cross", "select":
-       [
-        "sum(inclusive#sum#papi.slots) as slots",
-        "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring"
-       ]
-      }
+      "sum(inclusive#sum#papi.slots) as slots",
+      "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring"
      ]
     }
-  )json";
+   ]
+  }
+]
+)json";
+#else
+const char* builtin_papi_spr_option_specs = R"json(
+[
+  {
+   "name"        : "topdown.toplevel",
+   "description" : "Top-down analysis for Intel CPUs (top level)",
+   "type"        : "bool",
+   "category"    : "metric",
+   "services"    : [ "topdown" ],
+   "config"      : { "CALI_TOPDOWN_LEVEL": "top" },
+   "query"  :
+   [
+    { "level": "local", "select":
+     [
+      "any(topdown.retiring) as \"Retiring\"",
+      "any(topdown.backend_bound) as \"Backend bound\"",
+      "any(topdown.frontend_bound) as \"Frontend bound\"",
+      "any(topdown.bad_speculation) as \"Bad speculation\""
+     ]
+    },
+    { "level": "cross", "select":
+     [
+      "any(any#topdown.retiring) as \"Retiring\"",
+      "any(any#topdown.backend_bound) as \"Backend bound\"",
+      "any(any#topdown.frontend_bound) as \"Frontend bound\"",
+      "any(any#topdown.bad_speculation) as \"Bad speculation\""
+     ]
+    }
+   ]
+  },
+  {
+   "name"        : "topdown.all",
+   "description" : "Top-down analysis for Intel CPUs (all levels)",
+   "type"        : "bool",
+   "category"    : "metric",
+   "services"    : [ "topdown" ],
+   "config"      : { "CALI_TOPDOWN_LEVEL": "all" },
+   "query"  :
+   [
+    { "level": "local", "select":
+     [
+      "any(topdown.retiring) as \"Retiring\"",
+      "any(topdown.backend_bound) as \"Backend bound\"",
+      "any(topdown.frontend_bound) as \"Frontend bound\"",
+      "any(topdown.bad_speculation) as \"Bad speculation\"",
+      "any(topdown.branch_mispredict) as \"Branch mispredict\"",
+      "any(topdown.machine_clears) as \"Machine clears\"",
+      "any(topdown.frontend_latency) as \"Frontend latency\"",
+      "any(topdown.frontend_bandwidth) as \"Frontend bandwidth\"",
+      "any(topdown.memory_bound) as \"Memory bound\"",
+      "any(topdown.core_bound) as \"Core bound\"",
+      "any(topdown.light_ops) as \"Light operations\"",
+      "any(topdown.heavy_ops) as \"Heavy operations\""
+     ]
+    },
+    { "level": "cross", "select":
+     [
+      "any(any#topdown.retiring) as \"Retiring\"",
+      "any(any#topdown.backend_bound) as \"Backend bound\"",
+      "any(any#topdown.frontend_bound) as \"Frontend bound\"",
+      "any(any#topdown.bad_speculation) as \"Bad speculation\"",
+      "any(any#topdown.branch_mispredict) as \"Branch mispredict\"",
+      "any(any#topdown.machine_clears) as \"Machine clears\"",
+      "any(any#topdown.frontend_latency) as \"Frontend latency\"",
+      "any(any#topdown.frontend_bandwidth) as \"Frontend bandwidth\"",
+      "any(any#topdown.memory_bound) as \"Memory bound\"",
+      "any(any#topdown.core_bound) as \"Core bound\"",
+      "any(any#topdown.light_ops) as \"Light operations\"",
+      "any(any#topdown.heavy_ops) as \"Heavy operations\""
+     ]
+    }
+   ]
+  },
+  {
+   "name"        : "topdown-counters.toplevel",
+   "description" : "Raw counter values for Intel top-down analysis (top level)",
+   "type"        : "bool",
+   "category"    : "metric",
+   "services"    : [ "papi" ],
+   "config"      :
+   {
+     "CALI_PAPI_COUNTERS":
+       "perf::slots,perf::topdown-retiring,perf::topdown-bad-spec,perf::topdown-fe-bound,perf::topdown-be-bound,INT_MISC:UOP_DROPPING"
+   },
+   "query"  :
+   [
+    { "level": "local", "select":
+     [
+      "inclusive_sum(sum#papi.perf::slots) as slots",
+      "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring",
+      "inclusive_sum(sum#papi.perf::topdown-bad-spec) as topdown_bad_spec",
+      "inclusive_sum(sum#papi.perf::topdown-fe-bound) as topdown_fe_bound",
+      "inclusive_sum(sum#papi.perf::topdown-be-bound) as topdown_be_bound",
+      "inclusive_sum(sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping"
+     ]
+    },
+    { "level": "cross", "select":
+     [
+      "sum(inclusive#sum#papi.perf::slots) as slots",
+      "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring",
+      "sum(inclusive#sum#papi.perf::topdown-bad-spec) as topdown_bad_spec",
+      "sum(inclusive#sum#papi.perf::topdown-fe-bound) as topdown_fe_bound",
+      "sum(inclusive#sum#papi.perf::topdown-be-bound) as topdown_be_bound",
+      "sum(inclusive#sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping"
+     ]
+    }
+   ]
+  },
+  {
+   "name"        : "topdown-counters.all",
+   "description" : "Raw counter values for Intel top-down analysis (all levels)",
+   "type"        : "bool",
+   "category"    : "metric",
+   "services"    : [ "papi" ],
+   "config"      :
+   {
+     "CALI_PAPI_COUNTERS":
+       "perf::slots,perf::topdown-retiring,perf::topdown-bad-spec,perf::topdown-fe-bound,perf::topdown-be-bound,INT_MISC:UOP_DROPPING,perf_raw::r8400,perf_raw::r8500,perf_raw::r8600,perf_raw::r8700"
+   },
+   "query"  :
+   [
+    { "level": "local", "select":
+     [
+      "inclusive_sum(sum#papi.perf::slots) as slots",
+      "inclusive_sum(sum#papi.perf::topdown-retiring) as topdown_retiring",
+      "inclusive_sum(sum#papi.perf::topdown-bad-spec) as topdown_bad_spec",
+      "inclusive_sum(sum#papi.perf::topdown-fe-bound) as topdown_fe_bound",
+      "inclusive_sum(sum#papi.perf::topdown-be-bound) as topdown_be_bound",
+      "inclusive_sum(sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping",
+      "inclusive_sum(sum#papi.perf_raw::r8400) as topdown_heavy_ops",
+      "inclusive_sum(sum#papi.perf_raw::r8500) as topdown_br_mispredict",
+      "inclusive_sum(sum#papi.perf_raw::r8600) as topdown_fetch_lat",
+      "inclusive_sum(sum#papi.perf_raw::r8700) as topdown_mem_bound"
+     ]
+    },
+    { "level": "cross", "select":
+     [
+      "sum(inclusive#sum#papi.perf::slots) as slots",
+      "sum(inclusive#sum#papi.perf::topdown-retiring) as topdown_retiring",
+      "sum(inclusive#sum#papi.perf::topdown-bad-spec) as topdown_bad_spec",
+      "sum(inclusive#sum#papi.perf::topdown-fe-bound) as topdown_fe_bound",
+      "sum(inclusive#sum#papi.perf::topdown-be-bound) as topdown_be_bound",
+      "sum(inclusive#sum#papi.INT_MISC:UOP_DROPPING) as int_mist:uop_dropping",
+      "sum(inclusive#sum#papi.perf_raw::r8400) as topdown_heavy_ops",
+      "sum(inclusive#sum#papi.perf_raw::r8500) as topdown_br_mispredict",
+      "sum(inclusive#sum#papi.perf_raw::r8600) as topdown_fetch_lat",
+      "sum(inclusive#sum#papi.perf_raw::r8700) as topdown_mem_bound"
+     ]
+    }
+   ]
+  }
+]
+)json";
+#endif
 
 const char* builtin_kokkos_option_specs = R"json(
 [
diff --git a/src/services/topdown/CMakeLists.txt b/src/services/topdown/CMakeLists.txt
index d5dd230c..5fc9c537 100644
--- a/src/services/topdown/CMakeLists.txt
+++ b/src/services/topdown/CMakeLists.txt
@@ -1,22 +1,18 @@
 set(CALIPER_TOPDOWN_SOURCES
   IntelTopdown.cpp
   TopdownCalculator.cpp
-  HaswellTopdown.cpp
-  SapphireRapidsTopdown.cpp)
+  HaswellTopdown.cpp)
 
-if (CALIPER_HAVE_ARCH STREQUAL "sapphirerapids")
-  if (NOT EXISTS ${PAPI_PREFIX}/bin/papi_coponent_avail)
-    message(WARNING "Cannot check if PAPI uses rdpmc. Note that the topdown service will not work correctly on Sapphire Rapids if rdpmc is NOT enabled. This will be fixed by a future version of PAPI.")
-  else ()
-    execute_process(
-      COMMAND ${PAPI_PREFIX}/bin/papi_coponent_avail
-      OUTPUT_VARIABLE CALIPER_TOPDOWN_PAPI_COMPONENTS
-    )
-    string(FIND ${CALIPER_TOPDOWN_PAPI_COMPONENTS} "Fast counter read (rdpmc): yes" CALIPER_TOPDOWN_PAPI_USES_RDPMC)
-    if (CALIPER_TOPDOWN_PAPI_USES_RDPMC EQUAL "-1")
-      message(WARNING "Detected that PAPI does not use rdpmc to read counters. The topdown service will not work correctly on Sapphire Rapids if rdpmc is NOT enabled. This will be fixed by a future version of PAPI.")
-    endif ()
+
+if (CALIPER_WITH_PAPI_RDPMC)
+  message(STATUS "PAPI uses rdpmc")
+  if (CALIPER_HAVE_ARCH STREQUAL "sapphirerapids")
+    message(WARNING "Trying to use rdpmc for topdown on Sapphire Rapids will likely result in invalid values!")
   endif()
+  list(APPEND CALIPER_TOPDOWN_SOURCES SapphireRapidsTopdown_rdpmc.cpp)
+else()
+  message(STATUS "PAPI does not use rdpmc")
+  list(APPEND CALIPER_TOPDOWN_SOURCES SapphireRapidsTopdown_read.cpp)
 endif ()
 
 add_library(caliper-topdown OBJECT ${CALIPER_TOPDOWN_SOURCES})
diff --git a/src/services/topdown/SapphireRapidsTopdown.cpp b/src/services/topdown/SapphireRapidsTopdown_rdpmc.cpp
similarity index 100%
rename from src/services/topdown/SapphireRapidsTopdown.cpp
rename to src/services/topdown/SapphireRapidsTopdown_rdpmc.cpp
diff --git a/src/services/topdown/SapphireRapidsTopdown_read.cpp b/src/services/topdown/SapphireRapidsTopdown_read.cpp
new file mode 100644
index 00000000..1739e144
--- /dev/null
+++ b/src/services/topdown/SapphireRapidsTopdown_read.cpp
@@ -0,0 +1,303 @@
+#include "SapphireRapidsTopdown.h"
+
+#include <algorithm>
+
+namespace cali {
+namespace topdown {
+
+SapphireRapidsTopdown::SapphireRapidsTopdown(IntelTopdownLevel level)
+    : cali::topdown::TopdownCalculator(
+          level,
+          // top_counters
+          "perf::slots"
+          ",perf::topdown-retiring"
+          ",perf::topdown-bad-spec"
+          ",perf::topdown-fe-bound"
+          ",perf::topdown-be-bound"
+          ",INT_MISC:UOP_DROPPING",
+          // all_counters
+          "perf::slots"
+          ",perf::topdown-retiring"
+          ",perf::topdown-bad-spec"
+          ",perf::topdown-fe-bound"
+          ",perf::topdown-be-bound"
+          ",INT_MISC:UOP_DROPPING"
+          ",perf_raw::r8400"  // topdown-heavy-ops
+          ",perf_raw::r8500"  // topdown-br-mispredict
+          ",perf_raw::r8600"  // topdown-fetch-lat
+          ",perf_raw::r8700", // topdown-mem-bound
+          // res_top
+          {"retiring", "backend_bound", "frontend_bound", "bad_speculation"},
+          // res_all
+          {"retiring", "backend_bound", "frontend_bound", "bad_speculation",
+           "branch_mispredict", "machine_clears", "frontend_latency",
+           "frontend_bandwidth", "memory_bound", "core_bound", "light_ops",
+           "heavy_ops"}) {}
+
+bool SapphireRapidsTopdown::check_for_disabled_multiplex() const {
+  return true;
+}
+
+std::vector<Entry>
+SapphireRapidsTopdown::compute_toplevel(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  // Get PAPI metrics for toplevel calculations
+  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
+  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
+  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
+  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
+  Variant v_int_misc_uop_dropping =
+      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
+
+  // Check if any Variant is empty (use .empty())
+  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
+                       v_bad_spec.empty() || v_retiring.empty() ||
+                       v_int_misc_uop_dropping.empty() ||
+                       v_slots_or_info_thread_slots.empty();
+  // Check if all Variants are greater than 0 when casted to doubles (use
+  // .to_double())
+  bool is_nonzero =
+      v_fe_bound.to_double() > 0.0 && v_be_bound.to_double() > 0.0 &&
+      v_bad_spec.to_double() > 0.0 && v_retiring.to_double() > 0.0 &&
+      v_int_misc_uop_dropping.to_double() > 0.0 &&
+      v_slots_or_info_thread_slots.to_double() > 0.0;
+
+  // Check if bad values were obtained
+  if (is_incomplete || !is_nonzero)
+    return ret;
+
+  // Perform toplevel calcs
+  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
+                         v_fe_bound.to_double() + v_be_bound.to_double());
+
+  double retiring = (v_retiring.to_double() / toplevel_sum) +
+                    (0 * v_slots_or_info_thread_slots.to_double());
+  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
+                          (v_int_misc_uop_dropping.to_double() /
+                           v_slots_or_info_thread_slots.to_double());
+  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
+                         (0 * v_slots_or_info_thread_slots.to_double());
+  double bad_speculation =
+      std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
+
+  // Add toplevel metrics to vector of Entry
+  ret.reserve(4);
+  ret.push_back(
+      Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
+  ret.push_back(Entry(m_result_attrs["backend_bound"],
+                      Variant(std::max(backend_bound, 0.0))));
+  ret.push_back(Entry(m_result_attrs["frontend_bound"],
+                      Variant(std::max(frontend_bound, 0.0))));
+  ret.push_back(Entry(m_result_attrs["bad_speculation"],
+                      Variant(std::max(bad_speculation, 0.0))));
+
+  return ret;
+}
+
+std::size_t SapphireRapidsTopdown::get_num_expected_toplevel() const {
+  return 4;
+}
+
+std::vector<Entry>
+SapphireRapidsTopdown::compute_retiring(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  // Get PAPI metrics for toplevel calculations
+  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
+  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
+  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
+  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
+  Variant v_heavy_ops = get_val_from_rec(rec, "perf_raw::r8400");
+
+  // Check if any Variant is empty (use .empty())
+  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
+                       v_bad_spec.empty() || v_retiring.empty() ||
+                       v_slots_or_info_thread_slots.empty() ||
+                       v_heavy_ops.empty();
+
+  // Check if bad values were obtained
+  if (is_incomplete)
+    return ret;
+
+  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
+                         v_fe_bound.to_double() + v_be_bound.to_double());
+  // Copied from compute_toplevel
+  double retiring = (v_retiring.to_double() / toplevel_sum) +
+                    (0 * v_slots_or_info_thread_slots.to_double());
+
+  double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) +
+                     (0 * v_slots_or_info_thread_slots.to_double());
+  double light_ops = std::max(0.0, retiring - heavy_ops);
+
+  // Add toplevel metrics to vector of Entry
+  ret.reserve(2);
+  ret.push_back(
+      Entry(m_result_attrs["heavy_ops"], Variant(std::max(heavy_ops, 0.0))));
+  ret.push_back(
+      Entry(m_result_attrs["light_ops"], Variant(std::max(light_ops, 0.0))));
+
+  return ret;
+}
+
+std::size_t SapphireRapidsTopdown::get_num_expected_retiring() const {
+  return 2;
+}
+
+std::vector<Entry>
+SapphireRapidsTopdown::compute_backend_bound(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  // Get PAPI metrics for toplevel calculations
+  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
+  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
+  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
+  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
+  Variant v_memory_bound = get_val_from_rec(rec, "perf_raw::r8700");
+
+  // Check if any Variant is empty (use .empty())
+  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
+                       v_bad_spec.empty() || v_retiring.empty() ||
+                       v_slots_or_info_thread_slots.empty() ||
+                       v_memory_bound.empty();
+
+  // Check if bad values were obtained
+  if (is_incomplete)
+    return ret;
+
+  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
+                         v_fe_bound.to_double() + v_be_bound.to_double());
+  // Copied from compute_toplevel
+  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
+                         (0 * v_slots_or_info_thread_slots.to_double());
+
+  double memory_bound = (v_memory_bound.to_double() / toplevel_sum) +
+                        (0 * v_slots_or_info_thread_slots.to_double());
+  double core_bound = std::max(0.0, backend_bound - memory_bound);
+
+  // Add toplevel metrics to vector of Entry
+  ret.reserve(2);
+  ret.push_back(Entry(m_result_attrs["memory_bound"],
+                      Variant(std::max(memory_bound, 0.0))));
+  ret.push_back(
+      Entry(m_result_attrs["core_bound"], Variant(std::max(core_bound, 0.0))));
+
+  return ret;
+}
+
+std::size_t SapphireRapidsTopdown::get_num_expected_backend_bound() const {
+  return 2;
+}
+
+std::vector<Entry>
+SapphireRapidsTopdown::compute_frontend_bound(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  // Get PAPI metrics for toplevel calculations
+  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
+  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
+  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
+  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
+  Variant v_int_misc_uop_dropping =
+      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
+  Variant v_fetch_latency = get_val_from_rec(rec, "perf_raw::r8600");
+
+  // Check if any Variant is empty (use .empty())
+  bool is_incomplete =
+      v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
+      v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
+      v_slots_or_info_thread_slots.empty() || v_fetch_latency.empty();
+
+  // Check if bad values were obtained
+  if (is_incomplete)
+    return ret;
+
+  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
+                         v_fe_bound.to_double() + v_be_bound.to_double());
+  // Copied from compute_toplevel
+  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
+                          (v_int_misc_uop_dropping.to_double() /
+                           v_slots_or_info_thread_slots.to_double());
+
+  double fetch_latency = (v_fetch_latency.to_double() / toplevel_sum) -
+                         (v_int_misc_uop_dropping.to_double() /
+                          v_slots_or_info_thread_slots.to_double());
+
+  double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency);
+
+  // Add toplevel metrics to vector of Entry
+  ret.reserve(2);
+  ret.push_back(Entry(m_result_attrs["frontend_latency"],
+                      Variant(std::max(fetch_latency, 0.0))));
+  ret.push_back(Entry(m_result_attrs["frontend_bandwidth"],
+                      Variant(std::max(fetch_bandwidth, 0.0))));
+
+  return ret;
+}
+
+std::size_t SapphireRapidsTopdown::get_num_expected_frontend_bound() const {
+  return 2;
+}
+
+std::vector<Entry>
+SapphireRapidsTopdown::compute_bad_speculation(const std::vector<Entry> &rec) {
+  std::vector<Entry> ret;
+
+  // Get PAPI metrics for toplevel calculations
+  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
+  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
+  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
+  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
+  Variant v_int_misc_uop_dropping =
+      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
+  Variant v_branch_mispredict = get_val_from_rec(rec, "perf_raw::r8500");
+
+  // Check if any Variant is empty (use .empty())
+  bool is_incomplete =
+      v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
+      v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
+      v_slots_or_info_thread_slots.empty() || v_branch_mispredict.empty();
+
+  // Check if bad values were obtained
+  if (is_incomplete)
+    return ret;
+
+  // Perform toplevel calcs
+  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
+                         v_fe_bound.to_double() + v_be_bound.to_double());
+
+  double retiring = (v_retiring.to_double() / toplevel_sum) +
+                    (0 * v_slots_or_info_thread_slots.to_double());
+  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
+                          (v_int_misc_uop_dropping.to_double() /
+                           v_slots_or_info_thread_slots.to_double());
+  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
+                         (0 * v_slots_or_info_thread_slots.to_double());
+  double bad_speculation =
+      std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
+
+  double branch_mispredict = (v_branch_mispredict.to_double() / toplevel_sum) +
+                             (0 * v_slots_or_info_thread_slots.to_double());
+  double machine_clears = std::max(0.0, bad_speculation - branch_mispredict);
+
+  // Add toplevel metrics to vector of Entry
+  ret.reserve(2);
+  ret.push_back(Entry(m_result_attrs["branch_mispredict"],
+                      Variant(std::max(branch_mispredict, 0.0))));
+  ret.push_back(Entry(m_result_attrs["machine_clears"],
+                      Variant(std::max(machine_clears, 0.0))));
+
+  return ret;
+}
+
+std::size_t SapphireRapidsTopdown::get_num_expected_bad_speculation() const {
+  return 2;
+}
+
+} // namespace topdown
+} // namespace cali
\ No newline at end of file

From 77755903ce89bf221b1891611a7d4bdc37935924 Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Wed, 9 Oct 2024 14:06:20 -0400
Subject: [PATCH 10/11] Disables multiplexing in topdown-counters

---
 CMakeLists.txt                          |  4 ++++
 src/caliper/controllers/controllers.cpp | 19 -------------------
 src/services/topdown/CMakeLists.txt     |  2 +-
 3 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 70f21a9e..f1dd10d9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -84,6 +84,10 @@ add_caliper_option(WITH_CRAYPAT   "Enable CrayPAT region forwarding support" FAL
 add_caliper_option(WITH_LDMS      "Enable LDMS forwarder" FALSE)
 add_caliper_option(WITH_PAPI_RDPMC "Declare that PAPI is built to use rdpmc for reading counters. Does nothing if PAPI support is not enabled." TRUE)
 
+if (WITH_PAPI_RDPMC)
+  set(CALIPER_WITH_PAPI_RDPMC TRUE)
+endif ()
+
 
 set(WITH_ARCH "" CACHE STRING "Enable features specific to the provided archspec CPU architecture name")
 if (NOT WITH_ARCH STREQUAL "")
diff --git a/src/caliper/controllers/controllers.cpp b/src/caliper/controllers/controllers.cpp
index cbb08bcc..80598ab2 100644
--- a/src/caliper/controllers/controllers.cpp
+++ b/src/caliper/controllers/controllers.cpp
@@ -207,25 +207,6 @@ const ConfigManager::ConfigInfo* builtin_controllers_table[] = { &cuda_activity_
                                                                  &spot_controller_info,
                                                                  nullptr };
 
-// Compile-time string comparison
-// Based on code from:
-// https://gist.github.com/ac1dloop/4f7109e8856e5d28e769134bca7d6d7d
-constexpr bool const_strcmp(const char* a, const char* b)
-{
-    // Iterate until one of the strings hits its NULL terminator
-    for (; *a || *b;) {
-        // Check if the current characters in the strings are equal
-        // If not equal, return false
-        // If equal, progress to the next character in the strings
-        if (*a++ != *b++) {
-            return false;
-        }
-    }
-    // If we reach here, every character from the strings were equal,
-    // so we return true
-    return true;
-}
-
 const char* builtin_base_option_specs = R"json(
 [
 {
diff --git a/src/services/topdown/CMakeLists.txt b/src/services/topdown/CMakeLists.txt
index 5fc9c537..a5e59717 100644
--- a/src/services/topdown/CMakeLists.txt
+++ b/src/services/topdown/CMakeLists.txt
@@ -4,7 +4,7 @@ set(CALIPER_TOPDOWN_SOURCES
   HaswellTopdown.cpp)
 
 
-if (CALIPER_WITH_PAPI_RDPMC)
+if (WITH_PAPI_RDPMC)
   message(STATUS "PAPI uses rdpmc")
   if (CALIPER_HAVE_ARCH STREQUAL "sapphirerapids")
     message(WARNING "Trying to use rdpmc for topdown on Sapphire Rapids will likely result in invalid values!")

From f72b2cf7131e6f6c2dad66ff477db680a3322d2e Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Wed, 9 Oct 2024 17:07:08 -0400
Subject: [PATCH 11/11] Adds comments describing the expected behavior of the
 virtual methods in TopdownCalculator

---
 doc/sphinx/PythonSupport.rst                  |  45 ++
 doc/sphinx/build.rst                          | 246 +++++----
 doc/sphinx/index.rst                          |   1 +
 src/services/topdown/HaswellTopdown.cpp       | 400 +++++++-------
 src/services/topdown/HaswellTopdown.h         |  41 +-
 src/services/topdown/SapphireRapidsTopdown.h  |  41 +-
 .../topdown/SapphireRapidsTopdown_rdpmc.cpp   | 329 ++++++------
 .../topdown/SapphireRapidsTopdown_read.cpp    | 493 +++++++++---------
 src/services/topdown/TopdownCalculator.cpp    | 133 ++---
 src/services/topdown/TopdownCalculator.h      | 103 ++--
 10 files changed, 940 insertions(+), 892 deletions(-)
 create mode 100644 doc/sphinx/PythonSupport.rst

diff --git a/doc/sphinx/PythonSupport.rst b/doc/sphinx/PythonSupport.rst
new file mode 100644
index 00000000..cc36928e
--- /dev/null
+++ b/doc/sphinx/PythonSupport.rst
@@ -0,0 +1,45 @@
+Python support
+==============
+
+Caliper provides Python bindings based on `pybind11 <https://pybind11.readthedocs.io/en/stable/>`_
+for the annotation and :code:`ConfigManager` APIs. To build Caliper with Python support, enable
+the :code:`WITH_PYTHON_BINDINGS` option in the CMake configuration:
+
+.. code-block:: sh
+
+    $ cmake -DWITH_PYTHON_BINDINGS=On ..
+
+Using the Python module
+-----------------------
+
+The Python module requires pybind11 and an installation of Python that both supports
+pybind11 and provides development headers (e.g., :code:`Python.h`) and libraries
+(e.g., :code:`libpython3.8.so`).
+
+The Caliper Python module is installed in either :code:`lib/pythonX.Y/site-packages/` and/or
+:code:`lib64/pythonX.Y/site-packages` in the Caliper installation directory. In these paths,
+:code:`X.Y` corresponds to the major and minor version numbers of the Python installation used.
+Additionally, :code:`lib/` and :code:`lib64/` will be used in accordance with the configuration
+of the Python installed. To better understand the rules for where Python modules are installed, 
+see `this thread <https://discuss.python.org/t/understanding-site-packages-directories/12959>`_
+from the Python Software Foundation Discuss.
+
+To use the Caliper Python module, simply add the directories above to :code:`PYTHONPATH` or
+:code:`sys.path`. Note that the module will be automatically added to :code:`PYTHONPATH` when
+loading the Caliper package with Spack if the :code:`python` variant is enabled.
+The module can then be imported with :code:`import pycaliper`.
+
+Caliper Python API
+------------------
+
+The Caliper Python API supports a significant subset of the C and C++ annotation APIs.
+The simplest options are the :code:`pycaliper.begin_region()` and :code:`pycaliper.end_region()`
+functions. Caliper's Python API also provides the :code:`pycaliper.annotate_function` decorator
+as a higher-level way of annotating functions.
+
+The Python API also supports the Caliper :code:`ConfigManager` API (:doc:`ConfigManagerAPI`).
+The example is examples/apps/py-example.py demonstrates the annotation and
+:code:`ConfigManager` APIs for Python:
+
+.. literalinclude:: ../../examples/apps/py-example.py
+   :language: Python
\ No newline at end of file
diff --git a/doc/sphinx/build.rst b/doc/sphinx/build.rst
index 5c8d34e1..d3179afc 100644
--- a/doc/sphinx/build.rst
+++ b/doc/sphinx/build.rst
@@ -45,6 +45,9 @@ WITH_CUPTI
 WITH_FORTRAN
   Build the Fortran wrappers.
 
+WITH_PYTHON_BINDINGS
+  Build the Python bindings.
+
 WITH_GOTCHA
   Enable Gotcha support. Allows pthread, IO, and malloc/free tracking, and
   enables dynamic wrapping of MPI functions.
@@ -75,6 +78,9 @@ WITH_OMPT
 WITH_PAPI
   Enable PAPI support. Set PAPI installation dir in PAPI_PREFIX.
 
+WITH_PAPI_RDPMC
+  Specify that PAPI is built to use :code:`rdpmc` by default for reading counters.
+
 WITH_ROCTX
   Build adapters to forward Caliper annotations to AMD's roctx annotation API.
 
@@ -92,6 +98,10 @@ WITH_VTUNE
   Build adapters to forward Caliper annotations to Intel's VTune annotation API.
   Set Intel ITT API installation dir in ``ITT_PREFIX``.
 
+WITH_ARCH
+  Specify the architecture for which you are building to enable
+  architecture-specific functionality (e.g., topdown calculations).
+
 All options are off by default. On Linux, Gotcha is enabled by default.
 
 Linking Caliper programs
@@ -144,116 +154,126 @@ Feature and build option overview
 The following table shows the features, recipes, and services that are enabled
 with the given Caliper and spack build options.
 
-+----------------+---------------+---------------------------+--------------------+
-| CMake option   | Spack option  | Enabled features/recipes  | Enabled services   |
-+================+===============+===========================+====================+
-| WITH_ADIAK     | +adiak        | Import adiak metadata in  | adiak_import,      |
-|                |               | most config recipes       | adiak_export       |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_MPI       | +mpi          | - mpi-report recipe       | mpi, mpireport     |
-|                |               | - profile.mpi,            |                    |
-|                |               |   mpi.message.count,      |                    |
-|                |               |   mpi.message.size        |                    |
-|                |               |   recipe options          |                    |
-|                |               | - Cross-process           |                    |
-|                |               |   aggregation             |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_PAPI      | +papi         | - topdown.all,            | papi, topdown      |
-|                |               |   topdown.toplevel,       |                    |
-|                |               |   topdown-counters.*      |                    |
-|                |               |   recipe options for some |                    |
-|                |               |   x86 systems             |                    |
-|                |               | - PAPI counter collection |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_LIBDW     | +libdw        | - source.module,          | symbollookup       |
-|                |               |   source.function,        |                    |
-|                |               |   source.location         |                    |
-|                |               |   recipe options          |                    |
-|                |               | - Symbol name lookup      |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_LIBPFM    | +libpfm       | PerfEvent counter         | libpfm             |
-|                |               | collection and precise    |                    |
-|                |               | event sampling            |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_LIBUNWIND | +libunwind    | - callpath option for     | callpath           |
-|                |               |   sample-report and       |                    |
-|                |               |   event-trace recipes     |                    |
-|                |               |   (requires libdw)        |                    |
-|                |               | - Call stack unwinding    |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_SAMPLER   | +sampler      | - sample-report,          | sampler            |
-|                |               |   hatchet-sample-profile  |                    |
-|                |               |   recipes                 |                    |
-|                |               | - sampling option for     |                    |
-|                |               |   event-trace recipe      |                    |
-|                |               | - Linux sampling support  |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_CUPTI     | +cuda         | - cuda-activity-report    | cupti, cuptitrace  |
-|                |               |   cuda-activity-profile   |                    |
-|                |               |   recipes                 |                    |
-|                |               | - profile.cuda,           |                    |
-|                |               |   cuda.gputime,           |                    |
-|                |               |   cuda.memcpy recipe      |                    |
-|                |               |   options                 |                    |
-|                |               | - CUDA API profiling      |                    |
-|                |               | - CUDA activity tracing   |                    |
-+----------------+               +---------------------------+--------------------+
-| WITH_NVTX      |               | - nvtx recipe             | nvtx               |
-|                |               | - Caliper-to-NVTX region  |                    |
-|                |               |   forwarding              |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_ROCTRACER | +rocm         | - rocm-activity-report,   | roctracer          |
-|                |               |   rocm-activity-profile   |                    |
-|                |               |   recipes                 |                    |
-|                |               | - profile.hip             |                    |
-|                |               |   rocm.gputime,           |                    |
-|                |               |   rocm.memcpy recipe      |                    |
-|                |               |   options                 |                    |
-|                |               | - ROCm/HIP API profiling  |                    |
-|                |               | - ROCm activity tracing   |                    |
-+----------------+               +---------------------------+--------------------+
-| WITH_ROCTX     |               | - roctx recipe            | roctx              |
-|                |               | - Caliper-to-ROCTX region |                    |
-|                |               |   forwarding              |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_OMPT      | not available | - openmp-report recipe    | ompt               |
-|                | yet           | - openmp.times,           |                    |
-|                |               |   openmp.threads,         |                    |
-|                |               |   openmp.efficiency       |                    |
-|                |               |   recipe options          |                    |
-|                |               | - OpenMP tools interface  |                    |
-|                |               |   support (CPU only, no   |                    |
-|                |               |   target offload)         |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_GOTCHA    | +gotcha       | - io.bytes.*,             | io, pthread,       |
-|                |               |   io.*.bandwidth,         | sysalloc           |
-|                |               |   mem.highwatermark,      |                    |
-|                |               |   main_thread_only        |                    |
-|                |               |   recipe options          |                    |
-|                |               | - Use Gotcha for MPI      |                    |
-|                |               |   MPI function wrapping   |                    |
-|                |               |   instead of PMPI         |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_UMPIRE    | not available | umpire.totals,            | umpire             |
-|                | yet           | umpire.allocators options |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_VARIORUM  | +variorum     | Read variorum counters    | variorum           |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_PCP       | not available | - mem.*.bandwidth,        | pcp, pcp.memory    |
-|                | yet           |   mem.*.bytes recipe      |                    |
-|                |               |   options on some LLNL    |                    |
-|                |               |   LC systems              |                    |
-|                |               | - Read Performance        |                    |
-|                |               |   CoPilot counters        |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_VTUNE     | not available | Intel ITT API annotation  | vtune              |
-|                | yet           | forwarding                |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_CRAYPAT   | not available | HPE CrayPAT API           | craypat            |
-|                | yet           | annotation forwarding     |                    |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_KOKKOS    | +kokkos       | Enable Kokkos tool API    | kokkostime,        |
-|                |               | bindings                  | kokkoslookup       |
-+----------------+---------------+---------------------------+--------------------+
-| WITH_FORTRAN   | +fortran      | Enable Fortran API        |                    |
-+----------------+---------------+---------------------------+--------------------+
++----------------------+---------------+---------------+---------------------------+--------------------+
+| CMake option         | Default value | Spack option  | Enabled features/recipes  | Enabled services   |
++======================+===============+===============+===========================+====================+
+| WITH_ADIAK           | False         | +adiak        | Import adiak metadata in  | adiak_import,      |
+|                      |               |               | most config recipes       | adiak_export       |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_MPI             | False         | +mpi          | - mpi-report recipe       | mpi, mpireport     |
+|                      |               |               | - profile.mpi,            |                    |
+|                      |               |               |   mpi.message.count,      |                    |
+|                      |               |               |   mpi.message.size        |                    |
+|                      |               |               |   recipe options          |                    |
+|                      |               |               | - Cross-process           |                    |
+|                      |               |               |   aggregation             |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_PAPI            | False         | +papi         | - topdown.all,            | papi, topdown      |
+|                      |               |               |   topdown.toplevel,       |                    |
+|                      |               |               |   topdown-counters.*      |                    |
+|                      |               |               |   recipe options for some |                    |
+|                      |               |               |   x86 systems             |                    |
+|                      |               |               | - PAPI counter collection |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_PAPI_RDPMC      | True          | not available | Topdown calculations      |                    | 
+|                      |               | yet           | based on different        |                    | 
+|                      |               |               | approaches to reading     |                    |
+|                      |               |               | counters in PAPI          |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_LIBDW           | False         | +libdw        | - source.module,          | symbollookup       |
+|                      |               |               |   source.function,        |                    |
+|                      |               |               |   source.location         |                    |
+|                      |               |               |   recipe options          |                    |
+|                      |               |               | - Symbol name lookup      |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_LIBPFM          | False         | +libpfm       | PerfEvent counter         | libpfm             |
+|                      |               |               | collection and precise    |                    |
+|                      |               |               | event sampling            |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_LIBUNWIND       | False         | +libunwind    | - callpath option for     | callpath           |
+|                      |               |               |   sample-report and       |                    |
+|                      |               |               |   event-trace recipes     |                    |
+|                      |               |               |   (requires libdw)        |                    |
+|                      |               |               | - Call stack unwinding    |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_SAMPLER         | False         | +sampler      | - sample-report,          | sampler            |
+|                      |               |               |   hatchet-sample-profile  |                    |
+|                      |               |               |   recipes                 |                    |
+|                      |               |               | - sampling option for     |                    |
+|                      |               |               |   event-trace recipe      |                    |
+|                      |               |               | - Linux sampling support  |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_CUPTI           | False         | +cuda         | - cuda-activity-report    | cupti, cuptitrace  |
+|                      |               |               |   cuda-activity-profile   |                    |
+|                      |               |               |   recipes                 |                    |
+|                      |               |               | - profile.cuda,           |                    |
+|                      |               |               |   cuda.gputime,           |                    |
+|                      |               |               |   cuda.memcpy recipe      |                    |
+|                      |               |               |   options                 |                    |
+|                      |               |               | - CUDA API profiling      |                    |
+|                      |               |               | - CUDA activity tracing   |                    |
++----------------------+---------------+               +---------------------------+--------------------+
+| WITH_NVTX            | False         |               | - nvtx recipe             | nvtx               |
+|                      |               |               | - Caliper-to-NVTX region  |                    |
+|                      |               |               |   forwarding              |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_ROCTRACER       | False         | +rocm         | - rocm-activity-report,   | roctracer          |
+|                      |               |               |   rocm-activity-profile   |                    |
+|                      |               |               |   recipes                 |                    |
+|                      |               |               | - profile.hip             |                    |
+|                      |               |               |   rocm.gputime,           |                    |
+|                      |               |               |   rocm.memcpy recipe      |                    |
+|                      |               |               |   options                 |                    |
+|                      |               |               | - ROCm/HIP API profiling  |                    |
+|                      |               |               | - ROCm activity tracing   |                    |
++----------------------+---------------+               +---------------------------+--------------------+
+| WITH_ROCTX           | False         |               | - roctx recipe            | roctx              |
+|                      |               |               | - Caliper-to-ROCTX region |                    |
+|                      |               |               |   forwarding              |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_OMPT            | False         | not available | - openmp-report recipe    | ompt               |
+|                      |               | yet           | - openmp.times,           |                    |
+|                      |               |               |   openmp.threads,         |                    |
+|                      |               |               |   openmp.efficiency       |                    |
+|                      |               |               |   recipe options          |                    |
+|                      |               |               | - OpenMP tools interface  |                    |
+|                      |               |               |   support (CPU only, no   |                    |
+|                      |               |               |   target offload)         |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_GOTCHA          | True on       | +gotcha       | - io.bytes.*,             | io, pthread,       |
+|                      | Linux;        |               |   io.*.bandwidth,         | sysalloc           |
+|                      | False         |               |   mem.highwatermark,      |                    |
+|                      | otherwise     |               |   main_thread_only        |                    |
+|                      |               |               |   recipe options          |                    |
+|                      |               |               | - Use Gotcha for MPI      |                    |
+|                      |               |               |   MPI function wrapping   |                    |
+|                      |               |               |   instead of PMPI         |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_UMPIRE          | False         | not available | umpire.totals,            | umpire             |
+|                      |               | yet           | umpire.allocators options |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_VARIORUM        | False         | +variorum     | Read variorum counters    | variorum           |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_PCP             | False         | not available | - mem.*.bandwidth,        | pcp, pcp.memory    |
+|                      |               | yet           |   mem.*.bytes recipe      |                    |
+|                      |               |               |   options on some LLNL    |                    |
+|                      |               |               |   LC systems              |                    |
+|                      |               |               | - Read Performance        |                    |
+|                      |               |               |   CoPilot counters        |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_VTUNE           | False         | not available | Intel ITT API annotation  | vtune              |
+|                      |               | yet           | forwarding                |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_CRAYPAT         | False         | not available | HPE CrayPAT API           | craypat            |
+|                      |               | yet           | annotation forwarding     |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_KOKKOS          | True          | +kokkos       | Enable Kokkos tool API    | kokkostime,        |
+|                      |               |               | bindings                  | kokkoslookup       |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_FORTRAN         | False         | +fortran      | Enable Fortran API        |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_PYTHON_BINDINGS | False         | +python       | Enable Python API         |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
+| WITH_ARCH            | No default    | not available | Enable microarchitecture- |                    |
+|                      |               | yet           | specific features         |                    |
++----------------------+---------------+---------------+---------------------------+--------------------+
diff --git a/doc/sphinx/index.rst b/doc/sphinx/index.rst
index acf1dd16..692564f8 100644
--- a/doc/sphinx/index.rst
+++ b/doc/sphinx/index.rst
@@ -66,6 +66,7 @@ This section lists how-to articles for various use cases.
    SampleProfiling
    ThirdPartyTools
    FortranSupport
+   PythonSupport
 
 Reference documentation
 -------------------------------
diff --git a/src/services/topdown/HaswellTopdown.cpp b/src/services/topdown/HaswellTopdown.cpp
index f149a6c5..f57acd04 100644
--- a/src/services/topdown/HaswellTopdown.cpp
+++ b/src/services/topdown/HaswellTopdown.cpp
@@ -2,249 +2,231 @@
 
 #include <algorithm>
 
-namespace cali {
-namespace topdown {
+namespace cali
+{
+namespace topdown
+{
 
 HaswellTopdown::HaswellTopdown(IntelTopdownLevel level)
     : cali::topdown::TopdownCalculator(
-          level,
-          // top_counters
-          "CPU_CLK_THREAD_UNHALTED:THREAD_P"
-          ",IDQ_UOPS_NOT_DELIVERED:CORE"
-          ",INT_MISC:RECOVERY_CYCLES"
-          ",UOPS_ISSUED:ANY"
-          ",UOPS_RETIRED:RETIRE_SLOTS",
-          // all_counters
-          "BR_MISP_RETIRED:ALL_BRANCHES"
-          ",CPU_CLK_THREAD_UNHALTED:THREAD_P"
-          ",CYCLE_ACTIVITY:CYCLES_NO_EXECUTE"
-          ",CYCLE_ACTIVITY:STALLS_L1D_PENDING"
-          ",CYCLE_ACTIVITY:STALLS_L2_PENDING"
-          ",CYCLE_ACTIVITY:STALLS_LDM_PENDING"
-          ",IDQ_UOPS_NOT_DELIVERED:CORE"
-          ",IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE"
-          ",INT_MISC:RECOVERY_CYCLES"
-          ",MACHINE_CLEARS:COUNT"
-          ",MEM_LOAD_UOPS_RETIRED:L3_HIT"
-          ",MEM_LOAD_UOPS_RETIRED:L3_MISS"
-          ",UOPS_EXECUTED:CORE_CYCLES_GE_1"
-          ",UOPS_EXECUTED:CORE_CYCLES_GE_2"
-          ",UOPS_ISSUED:ANY"
-          ",UOPS_RETIRED:RETIRE_SLOTS",
-          // res_top
-          {"retiring", "backend_bound", "frontend_bound", "bad_speculation"},
-          // res_all
-          {"retiring", "backend_bound", "frontend_bound", "bad_speculation",
-           "branch_mispredict", "machine_clears", "frontend_latency",
-           "frontend_bandwidth", "memory_bound", "core_bound", "ext_mem_bound",
-           "l1_bound", "l2_bound", "l3_bound"}) {}
-
-bool HaswellTopdown::check_for_disabled_multiplex() const { return false; }
-
-std::vector<Entry>
-HaswellTopdown::compute_toplevel(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
-
-  Variant v_cpu_clk_unhalted_thread_p =
-      get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P");
-  Variant v_uops_retired_retire_slots =
-      get_val_from_rec(rec, "UOPS_RETIRED:RETIRE_SLOTS");
-  Variant v_uops_issued_any = get_val_from_rec(rec, "UOPS_ISSUED:ANY");
-  Variant v_int_misc_recovery_cycles =
-      get_val_from_rec(rec, "INT_MISC:RECOVERY_CYCLES");
-  Variant v_idq_uops_not_delivered_core =
-      get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CORE");
-
-  bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() ||
-                       v_uops_retired_retire_slots.empty() ||
-                       v_uops_issued_any.empty() ||
-                       v_int_misc_recovery_cycles.empty() ||
-                       v_idq_uops_not_delivered_core.empty();
-  bool is_nonzero = v_cpu_clk_unhalted_thread_p.to_double() > 0.0 &&
-                    v_uops_retired_retire_slots.to_double() > 0.0 &&
-                    v_uops_issued_any.to_double() > 0.0 &&
-                    v_int_misc_recovery_cycles.to_double() > 0.0 &&
-                    v_idq_uops_not_delivered_core.to_double() > 0.0;
-
-  double slots = 4.0 * v_cpu_clk_unhalted_thread_p.to_double();
-
-  if (is_incomplete || !is_nonzero || slots < 1.0)
+        level,
+        // top_counters
+        "CPU_CLK_THREAD_UNHALTED:THREAD_P"
+        ",IDQ_UOPS_NOT_DELIVERED:CORE"
+        ",INT_MISC:RECOVERY_CYCLES"
+        ",UOPS_ISSUED:ANY"
+        ",UOPS_RETIRED:RETIRE_SLOTS",
+        // all_counters
+        "BR_MISP_RETIRED:ALL_BRANCHES"
+        ",CPU_CLK_THREAD_UNHALTED:THREAD_P"
+        ",CYCLE_ACTIVITY:CYCLES_NO_EXECUTE"
+        ",CYCLE_ACTIVITY:STALLS_L1D_PENDING"
+        ",CYCLE_ACTIVITY:STALLS_L2_PENDING"
+        ",CYCLE_ACTIVITY:STALLS_LDM_PENDING"
+        ",IDQ_UOPS_NOT_DELIVERED:CORE"
+        ",IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE"
+        ",INT_MISC:RECOVERY_CYCLES"
+        ",MACHINE_CLEARS:COUNT"
+        ",MEM_LOAD_UOPS_RETIRED:L3_HIT"
+        ",MEM_LOAD_UOPS_RETIRED:L3_MISS"
+        ",UOPS_EXECUTED:CORE_CYCLES_GE_1"
+        ",UOPS_EXECUTED:CORE_CYCLES_GE_2"
+        ",UOPS_ISSUED:ANY"
+        ",UOPS_RETIRED:RETIRE_SLOTS",
+        // res_top
+        { "retiring", "backend_bound", "frontend_bound", "bad_speculation" },
+        // res_all
+        { "retiring",
+          "backend_bound",
+          "frontend_bound",
+          "bad_speculation",
+          "branch_mispredict",
+          "machine_clears",
+          "frontend_latency",
+          "frontend_bandwidth",
+          "memory_bound",
+          "core_bound",
+          "ext_mem_bound",
+          "l1_bound",
+          "l2_bound",
+          "l3_bound" }
+    )
+{}
+
+bool HaswellTopdown::check_for_disabled_multiplex() const
+{
+    return false;
+}
+
+std::vector<Entry> HaswellTopdown::compute_toplevel(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
+
+    Variant v_cpu_clk_unhalted_thread_p   = get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P");
+    Variant v_uops_retired_retire_slots   = get_val_from_rec(rec, "UOPS_RETIRED:RETIRE_SLOTS");
+    Variant v_uops_issued_any             = get_val_from_rec(rec, "UOPS_ISSUED:ANY");
+    Variant v_int_misc_recovery_cycles    = get_val_from_rec(rec, "INT_MISC:RECOVERY_CYCLES");
+    Variant v_idq_uops_not_delivered_core = get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CORE");
+
+    bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() || v_uops_retired_retire_slots.empty()
+                         || v_uops_issued_any.empty() || v_int_misc_recovery_cycles.empty()
+                         || v_idq_uops_not_delivered_core.empty();
+    bool is_nonzero = v_cpu_clk_unhalted_thread_p.to_double() > 0.0 && v_uops_retired_retire_slots.to_double() > 0.0
+                      && v_uops_issued_any.to_double() > 0.0 && v_int_misc_recovery_cycles.to_double() > 0.0
+                      && v_idq_uops_not_delivered_core.to_double() > 0.0;
+
+    double slots = 4.0 * v_cpu_clk_unhalted_thread_p.to_double();
+
+    if (is_incomplete || !is_nonzero || slots < 1.0)
+        return ret;
+
+    double retiring        = v_uops_retired_retire_slots.to_double() / slots;
+    double bad_speculation = (v_uops_issued_any.to_double() - v_uops_retired_retire_slots.to_double()
+                              + 4.0 * v_int_misc_recovery_cycles.to_double())
+                             / slots;
+    double frontend_bound = v_idq_uops_not_delivered_core.to_double() / slots;
+    double backend_bound  = 1.0 - (retiring + bad_speculation + frontend_bound);
+
+    ret.reserve(4);
+    ret.push_back(Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
+    ret.push_back(Entry(m_result_attrs["backend_bound"], Variant(std::max(backend_bound, 0.0))));
+    ret.push_back(Entry(m_result_attrs["frontend_bound"], Variant(std::max(frontend_bound, 0.0))));
+    ret.push_back(Entry(m_result_attrs["bad_speculation"], Variant(std::max(bad_speculation, 0.0))));
+
     return ret;
+}
 
-  double retiring = v_uops_retired_retire_slots.to_double() / slots;
-  double bad_speculation =
-      (v_uops_issued_any.to_double() - v_uops_retired_retire_slots.to_double() +
-       4.0 * v_int_misc_recovery_cycles.to_double()) /
-      slots;
-  double frontend_bound = v_idq_uops_not_delivered_core.to_double() / slots;
-  double backend_bound = 1.0 - (retiring + bad_speculation + frontend_bound);
-
-  ret.reserve(4);
-  ret.push_back(
-      Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
-  ret.push_back(Entry(m_result_attrs["backend_bound"],
-                      Variant(std::max(backend_bound, 0.0))));
-  ret.push_back(Entry(m_result_attrs["frontend_bound"],
-                      Variant(std::max(frontend_bound, 0.0))));
-  ret.push_back(Entry(m_result_attrs["bad_speculation"],
-                      Variant(std::max(bad_speculation, 0.0))));
-
-  return ret;
+std::size_t HaswellTopdown::get_num_expected_toplevel() const
+{
+    return 4;
 }
 
-std::size_t HaswellTopdown::get_num_expected_toplevel() const { return 4; }
+std::vector<Entry> HaswellTopdown::compute_retiring(const std::vector<Entry>& rec)
+{
+    return {};
+}
 
-std::vector<Entry>
-HaswellTopdown::compute_retiring(const std::vector<Entry> &rec) {
-  return {};
+std::size_t HaswellTopdown::get_num_expected_retiring() const
+{
+    return 0;
 }
 
-std::size_t HaswellTopdown::get_num_expected_retiring() const { return 0; }
-
-std::vector<Entry>
-HaswellTopdown::compute_backend_bound(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
-
-  Variant v_cpu_clk_unhalted_thread_p =
-      get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P");
-  Variant v_cycle_activity_stalls_ldm_pending =
-      get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_LDM_PENDING");
-  Variant v_cycle_activity_cycles_no_execute =
-      get_val_from_rec(rec, "CYCLE_ACTIVITY:CYCLES_NO_EXECUTE");
-  Variant v_uops_executed_core_cycles_ge_1 =
-      get_val_from_rec(rec, "UOPS_EXECUTED:CORE_CYCLES_GE_1");
-  Variant v_uops_executed_core_cycles_ge_2 =
-      get_val_from_rec(rec, "UOPS_EXECUTED:CORE_CYCLES_GE_2");
-  Variant v_mem_load_uops_retired_l3_miss =
-      get_val_from_rec(rec, "MEM_LOAD_UOPS_RETIRED:L3_MISS");
-  Variant v_mem_load_uops_retired_l3_hit =
-      get_val_from_rec(rec, "MEM_LOAD_UOPS_RETIRED:L3_HIT");
-  Variant v_cycle_activity_stalls_l2_pending =
-      get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_L2_PENDING");
-  Variant v_cycle_activity_stalls_l1d_pending =
-      get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_L1D_PENDING");
-
-  bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() ||
-                       v_cycle_activity_stalls_ldm_pending.empty() ||
-                       v_cycle_activity_cycles_no_execute.empty() ||
-                       v_uops_executed_core_cycles_ge_1.empty() ||
-                       v_uops_executed_core_cycles_ge_2.empty() ||
-                       v_mem_load_uops_retired_l3_miss.empty() ||
-                       v_mem_load_uops_retired_l3_hit.empty() ||
-                       v_cycle_activity_stalls_l2_pending.empty() ||
-                       v_cycle_activity_stalls_l1d_pending.empty();
-
-  double clocks = v_cpu_clk_unhalted_thread_p.to_double();
-
-  if (is_incomplete || !(clocks > 1.0))
-    return ret;
+std::vector<Entry> HaswellTopdown::compute_backend_bound(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
+
+    Variant v_cpu_clk_unhalted_thread_p         = get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P");
+    Variant v_cycle_activity_stalls_ldm_pending = get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_LDM_PENDING");
+    Variant v_cycle_activity_cycles_no_execute  = get_val_from_rec(rec, "CYCLE_ACTIVITY:CYCLES_NO_EXECUTE");
+    Variant v_uops_executed_core_cycles_ge_1    = get_val_from_rec(rec, "UOPS_EXECUTED:CORE_CYCLES_GE_1");
+    Variant v_uops_executed_core_cycles_ge_2    = get_val_from_rec(rec, "UOPS_EXECUTED:CORE_CYCLES_GE_2");
+    Variant v_mem_load_uops_retired_l3_miss     = get_val_from_rec(rec, "MEM_LOAD_UOPS_RETIRED:L3_MISS");
+    Variant v_mem_load_uops_retired_l3_hit      = get_val_from_rec(rec, "MEM_LOAD_UOPS_RETIRED:L3_HIT");
+    Variant v_cycle_activity_stalls_l2_pending  = get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_L2_PENDING");
+    Variant v_cycle_activity_stalls_l1d_pending = get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_L1D_PENDING");
+
+    bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() || v_cycle_activity_stalls_ldm_pending.empty()
+                         || v_cycle_activity_cycles_no_execute.empty() || v_uops_executed_core_cycles_ge_1.empty()
+                         || v_uops_executed_core_cycles_ge_2.empty() || v_mem_load_uops_retired_l3_miss.empty()
+                         || v_mem_load_uops_retired_l3_hit.empty() || v_cycle_activity_stalls_l2_pending.empty()
+                         || v_cycle_activity_stalls_l1d_pending.empty();
+
+    double clocks = v_cpu_clk_unhalted_thread_p.to_double();
+
+    if (is_incomplete || !(clocks > 1.0))
+        return ret;
+
+    double memory_bound = v_cycle_activity_stalls_ldm_pending.to_double() / clocks;
+    double be_bound_at_exe =
+        (v_cycle_activity_cycles_no_execute.to_double() + v_uops_executed_core_cycles_ge_1.to_double()
+         - v_uops_executed_core_cycles_ge_2.to_double())
+        / clocks;
+    double l3_tot = v_mem_load_uops_retired_l3_hit.to_double() + 7.0 * v_mem_load_uops_retired_l3_miss.to_double();
+    double l3_hit_fraction  = 0.0;
+    double l3_miss_fraction = 0.0;
+    if (l3_tot > 0.0) {
+        l3_hit_fraction  = v_mem_load_uops_retired_l3_hit.to_double() / l3_tot;
+        l3_miss_fraction = v_mem_load_uops_retired_l3_miss.to_double() / l3_tot;
+    }
+    double ext_mem_bound = v_cycle_activity_stalls_l2_pending.to_double() * l3_miss_fraction / clocks;
+    double l1_bound =
+        (v_cycle_activity_stalls_ldm_pending.to_double() - v_cycle_activity_stalls_l1d_pending.to_double()) / clocks;
+    double l2_bound =
+        (v_cycle_activity_stalls_l1d_pending.to_double() - v_cycle_activity_stalls_l2_pending.to_double()) / clocks;
+    double l3_bound = v_cycle_activity_stalls_l2_pending.to_double() * l3_hit_fraction / clocks;
+
+    ret.reserve(6);
+    ret.push_back(Entry(m_result_attrs["memory_bound"], Variant(memory_bound)));
+    ret.push_back(Entry(m_result_attrs["core_bound"], Variant(be_bound_at_exe - memory_bound)));
+    ret.push_back(Entry(m_result_attrs["ext_mem_bound"], Variant(ext_mem_bound)));
+    ret.push_back(Entry(m_result_attrs["l1_bound"], Variant(l1_bound)));
+    ret.push_back(Entry(m_result_attrs["l2_bound"], Variant(l2_bound)));
+    ret.push_back(Entry(m_result_attrs["l3_bound"], Variant(l3_bound)));
 
-  double memory_bound =
-      v_cycle_activity_stalls_ldm_pending.to_double() / clocks;
-  double be_bound_at_exe = (v_cycle_activity_cycles_no_execute.to_double() +
-                            v_uops_executed_core_cycles_ge_1.to_double() -
-                            v_uops_executed_core_cycles_ge_2.to_double()) /
-                           clocks;
-  double l3_tot = v_mem_load_uops_retired_l3_hit.to_double() +
-                  7.0 * v_mem_load_uops_retired_l3_miss.to_double();
-  double l3_hit_fraction = 0.0;
-  double l3_miss_fraction = 0.0;
-  if (l3_tot > 0.0) {
-    l3_hit_fraction = v_mem_load_uops_retired_l3_hit.to_double() / l3_tot;
-    l3_miss_fraction = v_mem_load_uops_retired_l3_miss.to_double() / l3_tot;
-  }
-  double ext_mem_bound = v_cycle_activity_stalls_l2_pending.to_double() *
-                         l3_miss_fraction / clocks;
-  double l1_bound = (v_cycle_activity_stalls_ldm_pending.to_double() -
-                     v_cycle_activity_stalls_l1d_pending.to_double()) /
-                    clocks;
-  double l2_bound = (v_cycle_activity_stalls_l1d_pending.to_double() -
-                     v_cycle_activity_stalls_l2_pending.to_double()) /
-                    clocks;
-  double l3_bound =
-      v_cycle_activity_stalls_l2_pending.to_double() * l3_hit_fraction / clocks;
-
-  ret.reserve(6);
-  ret.push_back(Entry(m_result_attrs["memory_bound"], Variant(memory_bound)));
-  ret.push_back(Entry(m_result_attrs["core_bound"],
-                      Variant(be_bound_at_exe - memory_bound)));
-  ret.push_back(Entry(m_result_attrs["ext_mem_bound"], Variant(ext_mem_bound)));
-  ret.push_back(Entry(m_result_attrs["l1_bound"], Variant(l1_bound)));
-  ret.push_back(Entry(m_result_attrs["l2_bound"], Variant(l2_bound)));
-  ret.push_back(Entry(m_result_attrs["l3_bound"], Variant(l3_bound)));
-
-  return ret;
+    return ret;
 }
 
-std::size_t HaswellTopdown::get_num_expected_backend_bound() const { return 6; }
+std::size_t HaswellTopdown::get_num_expected_backend_bound() const
+{
+    return 6;
+}
 
-std::vector<Entry>
-HaswellTopdown::compute_frontend_bound(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
+std::vector<Entry> HaswellTopdown::compute_frontend_bound(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
 
-  Variant v_cpu_clk_unhalted_thread_p =
-      get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P");
-  Variant v_idq_uops_not_delivered =
-      get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE");
+    Variant v_cpu_clk_unhalted_thread_p = get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P");
+    Variant v_idq_uops_not_delivered    = get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE");
 
-  bool is_incomplete =
-      v_cpu_clk_unhalted_thread_p.empty() || v_idq_uops_not_delivered.empty();
+    bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() || v_idq_uops_not_delivered.empty();
 
-  double clocks = v_cpu_clk_unhalted_thread_p.to_double();
-  double uops = v_idq_uops_not_delivered.to_double();
+    double clocks = v_cpu_clk_unhalted_thread_p.to_double();
+    double uops   = v_idq_uops_not_delivered.to_double();
 
-  if (is_incomplete || clocks < 1.0 || uops > clocks)
-    return ret;
+    if (is_incomplete || clocks < 1.0 || uops > clocks)
+        return ret;
 
-  double fe_latency = uops / clocks;
+    double fe_latency = uops / clocks;
 
-  ret.reserve(2);
-  ret.push_back(Entry(m_result_attrs["frontend_latency"], Variant(fe_latency)));
-  ret.push_back(
-      Entry(m_result_attrs["frontend_bandwidth"], Variant(1.0 - fe_latency)));
+    ret.reserve(2);
+    ret.push_back(Entry(m_result_attrs["frontend_latency"], Variant(fe_latency)));
+    ret.push_back(Entry(m_result_attrs["frontend_bandwidth"], Variant(1.0 - fe_latency)));
 
-  return ret;
+    return ret;
 }
 
-std::size_t HaswellTopdown::get_num_expected_frontend_bound() const {
-  return 2;
+std::size_t HaswellTopdown::get_num_expected_frontend_bound() const
+{
+    return 2;
 }
 
-std::vector<Entry>
-HaswellTopdown::compute_bad_speculation(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
+std::vector<Entry> HaswellTopdown::compute_bad_speculation(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
 
-  Variant v_br_misp_retired_all_branches =
-      get_val_from_rec(rec, "BR_MISP_RETIRED:ALL_BRANCHES");
-  Variant v_machine_clears_count =
-      get_val_from_rec(rec, "MACHINE_CLEARS:COUNT");
+    Variant v_br_misp_retired_all_branches = get_val_from_rec(rec, "BR_MISP_RETIRED:ALL_BRANCHES");
+    Variant v_machine_clears_count         = get_val_from_rec(rec, "MACHINE_CLEARS:COUNT");
 
-  bool is_incomplete =
-      v_br_misp_retired_all_branches.empty() || v_machine_clears_count.empty();
+    bool is_incomplete = v_br_misp_retired_all_branches.empty() || v_machine_clears_count.empty();
 
-  double br_misp_retired_all_branches =
-      v_br_misp_retired_all_branches.to_double();
-  double machine_clears_count = v_machine_clears_count.to_double();
+    double br_misp_retired_all_branches = v_br_misp_retired_all_branches.to_double();
+    double machine_clears_count         = v_machine_clears_count.to_double();
 
-  if (is_incomplete ||
-      !(br_misp_retired_all_branches + machine_clears_count > 1.0))
-    return ret;
+    if (is_incomplete || !(br_misp_retired_all_branches + machine_clears_count > 1.0))
+        return ret;
 
-  double branch_mispredict =
-      br_misp_retired_all_branches /
-      (br_misp_retired_all_branches + machine_clears_count);
+    double branch_mispredict = br_misp_retired_all_branches / (br_misp_retired_all_branches + machine_clears_count);
 
-  ret.reserve(2);
-  ret.push_back(
-      Entry(m_result_attrs["branch_mispredict"], Variant(branch_mispredict)));
-  ret.push_back(Entry(m_result_attrs["machine_clears"],
-                      Variant(1.0 - branch_mispredict)));
+    ret.reserve(2);
+    ret.push_back(Entry(m_result_attrs["branch_mispredict"], Variant(branch_mispredict)));
+    ret.push_back(Entry(m_result_attrs["machine_clears"], Variant(1.0 - branch_mispredict)));
 
-  return ret;
+    return ret;
 }
 
-std::size_t HaswellTopdown::get_num_expected_bad_speculation() const {
-  return 2;
+std::size_t HaswellTopdown::get_num_expected_bad_speculation() const
+{
+    return 2;
 }
 
 } // namespace topdown
diff --git a/src/services/topdown/HaswellTopdown.h b/src/services/topdown/HaswellTopdown.h
index 5ca0a9be..01c99ebc 100644
--- a/src/services/topdown/HaswellTopdown.h
+++ b/src/services/topdown/HaswellTopdown.h
@@ -3,41 +3,40 @@
 
 #include "TopdownCalculator.h"
 
-namespace cali {
-namespace topdown {
+namespace cali
+{
+namespace topdown
+{
 
-class HaswellTopdown : public TopdownCalculator {
+class HaswellTopdown : public TopdownCalculator
+{
 public:
-  HaswellTopdown(IntelTopdownLevel level);
 
-  virtual ~HaswellTopdown() = default;
+    HaswellTopdown(IntelTopdownLevel level);
 
-  virtual bool check_for_disabled_multiplex() const override;
+    virtual ~HaswellTopdown() = default;
 
-  virtual std::vector<Entry>
-  compute_toplevel(const std::vector<Entry> &rec) override;
+    virtual bool check_for_disabled_multiplex() const override;
 
-  virtual std::size_t get_num_expected_toplevel() const override;
+    virtual std::vector<Entry> compute_toplevel(const std::vector<Entry>& rec) override;
 
-  virtual std::vector<Entry>
-  compute_retiring(const std::vector<Entry> &rec) override;
+    virtual std::size_t get_num_expected_toplevel() const override;
 
-  virtual std::size_t get_num_expected_retiring() const override;
+    virtual std::vector<Entry> compute_retiring(const std::vector<Entry>& rec) override;
 
-  virtual std::vector<Entry>
-  compute_backend_bound(const std::vector<Entry> &rec) override;
+    virtual std::size_t get_num_expected_retiring() const override;
 
-  virtual std::size_t get_num_expected_backend_bound() const override;
+    virtual std::vector<Entry> compute_backend_bound(const std::vector<Entry>& rec) override;
 
-  virtual std::vector<Entry>
-  compute_frontend_bound(const std::vector<Entry> &rec) override;
+    virtual std::size_t get_num_expected_backend_bound() const override;
 
-  virtual std::size_t get_num_expected_frontend_bound() const override;
+    virtual std::vector<Entry> compute_frontend_bound(const std::vector<Entry>& rec) override;
 
-  virtual std::vector<Entry>
-  compute_bad_speculation(const std::vector<Entry> &rec) override;
+    virtual std::size_t get_num_expected_frontend_bound() const override;
 
-  virtual std::size_t get_num_expected_bad_speculation() const override;
+    virtual std::vector<Entry> compute_bad_speculation(const std::vector<Entry>& rec) override;
+
+    virtual std::size_t get_num_expected_bad_speculation() const override;
 };
 
 } // namespace topdown
diff --git a/src/services/topdown/SapphireRapidsTopdown.h b/src/services/topdown/SapphireRapidsTopdown.h
index 8fc75282..bdba3bd8 100644
--- a/src/services/topdown/SapphireRapidsTopdown.h
+++ b/src/services/topdown/SapphireRapidsTopdown.h
@@ -3,41 +3,40 @@
 
 #include "TopdownCalculator.h"
 
-namespace cali {
-namespace topdown {
+namespace cali
+{
+namespace topdown
+{
 
-class SapphireRapidsTopdown : public TopdownCalculator {
+class SapphireRapidsTopdown : public TopdownCalculator
+{
 public:
-  SapphireRapidsTopdown(IntelTopdownLevel level);
 
-  virtual ~SapphireRapidsTopdown() = default;
+    SapphireRapidsTopdown(IntelTopdownLevel level);
 
-  virtual bool check_for_disabled_multiplex() const override;
+    virtual ~SapphireRapidsTopdown() = default;
 
-  virtual std::vector<Entry>
-  compute_toplevel(const std::vector<Entry> &rec) override;
+    virtual bool check_for_disabled_multiplex() const override;
 
-  virtual std::size_t get_num_expected_toplevel() const override;
+    virtual std::vector<Entry> compute_toplevel(const std::vector<Entry>& rec) override;
 
-  virtual std::vector<Entry>
-  compute_retiring(const std::vector<Entry> &rec) override;
+    virtual std::size_t get_num_expected_toplevel() const override;
 
-  virtual std::size_t get_num_expected_retiring() const override;
+    virtual std::vector<Entry> compute_retiring(const std::vector<Entry>& rec) override;
 
-  virtual std::vector<Entry>
-  compute_backend_bound(const std::vector<Entry> &rec) override;
+    virtual std::size_t get_num_expected_retiring() const override;
 
-  virtual std::size_t get_num_expected_backend_bound() const override;
+    virtual std::vector<Entry> compute_backend_bound(const std::vector<Entry>& rec) override;
 
-  virtual std::vector<Entry>
-  compute_frontend_bound(const std::vector<Entry> &rec) override;
+    virtual std::size_t get_num_expected_backend_bound() const override;
 
-  virtual std::size_t get_num_expected_frontend_bound() const override;
+    virtual std::vector<Entry> compute_frontend_bound(const std::vector<Entry>& rec) override;
 
-  virtual std::vector<Entry>
-  compute_bad_speculation(const std::vector<Entry> &rec) override;
+    virtual std::size_t get_num_expected_frontend_bound() const override;
 
-  virtual std::size_t get_num_expected_bad_speculation() const override;
+    virtual std::vector<Entry> compute_bad_speculation(const std::vector<Entry>& rec) override;
+
+    virtual std::size_t get_num_expected_bad_speculation() const override;
 };
 
 } // namespace topdown
diff --git a/src/services/topdown/SapphireRapidsTopdown_rdpmc.cpp b/src/services/topdown/SapphireRapidsTopdown_rdpmc.cpp
index a7e55bcf..79df5dbc 100644
--- a/src/services/topdown/SapphireRapidsTopdown_rdpmc.cpp
+++ b/src/services/topdown/SapphireRapidsTopdown_rdpmc.cpp
@@ -12,233 +12,222 @@
 #define FETCH_LAT_OFFSET 6
 #define MEM_BOUND_OFFSET 7
 
-static double get_tma_percent_from_rdpmc_value(uint64_t rdpmc_value,
-                                               uint64_t offset) {
-  return (double)((rdpmc_value >> (offset * 8)) & 0xff) / 0xff;
+static double get_tma_percent_from_rdpmc_value(uint64_t rdpmc_value, uint64_t offset)
+{
+    return (double) ((rdpmc_value >> (offset * 8)) & 0xff) / 0xff;
 }
 
-namespace cali {
-namespace topdown {
+namespace cali
+{
+namespace topdown
+{
 
 SapphireRapidsTopdown::SapphireRapidsTopdown(IntelTopdownLevel level)
     : cali::topdown::TopdownCalculator(
-          level,
-          // top_counters
-          "perf::slots"
-          ",perf::topdown-retiring",
-          // all_counters
-          "perf::slots"
-          ",perf::topdown-retiring",
-          // res_top
-          {"retiring", "backend_bound", "frontend_bound", "bad_speculation"},
-          // res_all
-          {"retiring", "backend_bound", "frontend_bound", "bad_speculation",
-           "branch_mispredict", "machine_clears", "frontend_latency",
-           "frontend_bandwidth", "memory_bound", "core_bound", "light_ops",
-           "heavy_ops"}) {}
-
-bool SapphireRapidsTopdown::check_for_disabled_multiplex() const {
-  return true;
+        level,
+        // top_counters
+        "perf::slots"
+        ",perf::topdown-retiring",
+        // all_counters
+        "perf::slots"
+        ",perf::topdown-retiring",
+        // res_top
+        { "retiring", "backend_bound", "frontend_bound", "bad_speculation" },
+        // res_all
+        { "retiring",
+          "backend_bound",
+          "frontend_bound",
+          "bad_speculation",
+          "branch_mispredict",
+          "machine_clears",
+          "frontend_latency",
+          "frontend_bandwidth",
+          "memory_bound",
+          "core_bound",
+          "light_ops",
+          "heavy_ops" }
+    )
+{}
+
+bool SapphireRapidsTopdown::check_for_disabled_multiplex() const
+{
+    return true;
 }
 
-std::vector<Entry>
-SapphireRapidsTopdown::compute_toplevel(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
+std::vector<Entry> SapphireRapidsTopdown::compute_toplevel(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
 
-  // Get PAPI metrics for toplevel calculations
-  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
+    // Get PAPI metrics for toplevel calculations
+    Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+    Variant v_tma_metrics                = get_val_from_rec(rec, "perf::topdown-retiring");
 
-  // Check if any Variant is empty (use .empty())
-  bool is_incomplete =
-      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
-  // Check if all Variants are greater than 0 when casted to doubles (use
-  // .to_double())
-  bool is_nonzero = v_tma_metrics.to_uint() > 0;
+    // Check if any Variant is empty (use .empty())
+    bool is_incomplete = v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
+    // Check if all Variants are greater than 0 when casted to doubles (use
+    // .to_double())
+    bool is_nonzero = v_tma_metrics.to_uint() > 0;
 
-  // Check if bad values were obtained
-  if (is_incomplete || !is_nonzero)
-    return ret;
+    // Check if bad values were obtained
+    if (is_incomplete || !is_nonzero)
+        return ret;
+
+    uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
+
+    double retiring        = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET);
+    double frontend_bound  = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET);
+    double backend_bound   = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET);
+    double bad_speculation = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET);
+
+    // Add toplevel metrics to vector of Entry
+    ret.reserve(4);
+    ret.push_back(Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
+    ret.push_back(Entry(m_result_attrs["backend_bound"], Variant(std::max(backend_bound, 0.0))));
+    ret.push_back(Entry(m_result_attrs["frontend_bound"], Variant(std::max(frontend_bound, 0.0))));
+    ret.push_back(Entry(m_result_attrs["bad_speculation"], Variant(std::max(bad_speculation, 0.0))));
 
-  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
-
-  double retiring =
-      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET);
-  double frontend_bound =
-      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET);
-  double backend_bound =
-      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET);
-  double bad_speculation =
-      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET);
-
-  // Add toplevel metrics to vector of Entry
-  ret.reserve(4);
-  ret.push_back(
-      Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
-  ret.push_back(Entry(m_result_attrs["backend_bound"],
-                      Variant(std::max(backend_bound, 0.0))));
-  ret.push_back(Entry(m_result_attrs["frontend_bound"],
-                      Variant(std::max(frontend_bound, 0.0))));
-  ret.push_back(Entry(m_result_attrs["bad_speculation"],
-                      Variant(std::max(bad_speculation, 0.0))));
-
-  return ret;
+    return ret;
 }
 
-std::size_t SapphireRapidsTopdown::get_num_expected_toplevel() const {
-  return 4;
+std::size_t SapphireRapidsTopdown::get_num_expected_toplevel() const
+{
+    return 4;
 }
 
-std::vector<Entry>
-SapphireRapidsTopdown::compute_retiring(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
+std::vector<Entry> SapphireRapidsTopdown::compute_retiring(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
 
-  // Get PAPI metrics for toplevel calculations
-  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
+    // Get PAPI metrics for toplevel calculations
+    Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+    Variant v_tma_metrics                = get_val_from_rec(rec, "perf::topdown-retiring");
 
-  // Check if any Variant is empty (use .empty())
-  bool is_incomplete =
-      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
+    // Check if any Variant is empty (use .empty())
+    bool is_incomplete = v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
 
-  // Check if bad values were obtained
-  if (is_incomplete)
-    return ret;
+    // Check if bad values were obtained
+    if (is_incomplete)
+        return ret;
 
-  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
+    uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
 
-  double retiring =
-      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET);
-  double heavy_ops =
-      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, HEAVY_OPS_OFFSET);
-  double light_ops = std::max(0.0, retiring - heavy_ops);
+    double retiring  = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET);
+    double heavy_ops = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, HEAVY_OPS_OFFSET);
+    double light_ops = std::max(0.0, retiring - heavy_ops);
 
-  // Add toplevel metrics to vector of Entry
-  ret.reserve(2);
-  ret.push_back(
-      Entry(m_result_attrs["heavy_ops"], Variant(std::max(heavy_ops, 0.0))));
-  ret.push_back(
-      Entry(m_result_attrs["light_ops"], Variant(std::max(light_ops, 0.0))));
+    // Add toplevel metrics to vector of Entry
+    ret.reserve(2);
+    ret.push_back(Entry(m_result_attrs["heavy_ops"], Variant(std::max(heavy_ops, 0.0))));
+    ret.push_back(Entry(m_result_attrs["light_ops"], Variant(std::max(light_ops, 0.0))));
 
-  return ret;
+    return ret;
 }
 
-std::size_t SapphireRapidsTopdown::get_num_expected_retiring() const {
-  return 2;
+std::size_t SapphireRapidsTopdown::get_num_expected_retiring() const
+{
+    return 2;
 }
 
-std::vector<Entry>
-SapphireRapidsTopdown::compute_backend_bound(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
+std::vector<Entry> SapphireRapidsTopdown::compute_backend_bound(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
 
-  // Get PAPI metrics for toplevel calculations
-  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
+    // Get PAPI metrics for toplevel calculations
+    Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+    Variant v_tma_metrics                = get_val_from_rec(rec, "perf::topdown-retiring");
 
-  // Check if any Variant is empty (use .empty())
-  bool is_incomplete =
-      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
+    // Check if any Variant is empty (use .empty())
+    bool is_incomplete = v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
 
-  // Check if bad values were obtained
-  if (is_incomplete)
-    return ret;
+    // Check if bad values were obtained
+    if (is_incomplete)
+        return ret;
 
-  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
+    uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
 
-  double backend_bound =
-      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET);
-  double memory_bound =
-      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, MEM_BOUND_OFFSET);
-  double core_bound = std::max(0.0, backend_bound - memory_bound);
+    double backend_bound = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET);
+    double memory_bound  = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, MEM_BOUND_OFFSET);
+    double core_bound    = std::max(0.0, backend_bound - memory_bound);
 
-  // Add toplevel metrics to vector of Entry
-  ret.reserve(2);
-  ret.push_back(Entry(m_result_attrs["memory_bound"],
-                      Variant(std::max(memory_bound, 0.0))));
-  ret.push_back(
-      Entry(m_result_attrs["core_bound"], Variant(std::max(core_bound, 0.0))));
+    // Add toplevel metrics to vector of Entry
+    ret.reserve(2);
+    ret.push_back(Entry(m_result_attrs["memory_bound"], Variant(std::max(memory_bound, 0.0))));
+    ret.push_back(Entry(m_result_attrs["core_bound"], Variant(std::max(core_bound, 0.0))));
 
-  return ret;
+    return ret;
 }
 
-std::size_t SapphireRapidsTopdown::get_num_expected_backend_bound() const {
-  return 2;
+std::size_t SapphireRapidsTopdown::get_num_expected_backend_bound() const
+{
+    return 2;
 }
 
-std::vector<Entry>
-SapphireRapidsTopdown::compute_frontend_bound(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
+std::vector<Entry> SapphireRapidsTopdown::compute_frontend_bound(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
 
-  // Get PAPI metrics for toplevel calculations
-  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
+    // Get PAPI metrics for toplevel calculations
+    Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+    Variant v_tma_metrics                = get_val_from_rec(rec, "perf::topdown-retiring");
 
-  // Check if any Variant is empty (use .empty())
-  bool is_incomplete =
-      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
+    // Check if any Variant is empty (use .empty())
+    bool is_incomplete = v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
 
-  // Check if bad values were obtained
-  if (is_incomplete)
-    return ret;
+    // Check if bad values were obtained
+    if (is_incomplete)
+        return ret;
 
-  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
+    uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
 
-  double frontend_bound =
-      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET);
-  double fetch_latency =
-      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FETCH_LAT_OFFSET);
-  double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency);
+    double frontend_bound  = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET);
+    double fetch_latency   = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FETCH_LAT_OFFSET);
+    double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency);
 
-  // Add toplevel metrics to vector of Entry
-  ret.reserve(2);
-  ret.push_back(Entry(m_result_attrs["frontend_latency"],
-                      Variant(std::max(fetch_latency, 0.0))));
-  ret.push_back(Entry(m_result_attrs["frontend_bandwidth"],
-                      Variant(std::max(fetch_bandwidth, 0.0))));
+    // Add toplevel metrics to vector of Entry
+    ret.reserve(2);
+    ret.push_back(Entry(m_result_attrs["frontend_latency"], Variant(std::max(fetch_latency, 0.0))));
+    ret.push_back(Entry(m_result_attrs["frontend_bandwidth"], Variant(std::max(fetch_bandwidth, 0.0))));
 
-  return ret;
+    return ret;
 }
 
-std::size_t SapphireRapidsTopdown::get_num_expected_frontend_bound() const {
-  return 2;
+std::size_t SapphireRapidsTopdown::get_num_expected_frontend_bound() const
+{
+    return 2;
 }
 
-std::vector<Entry>
-SapphireRapidsTopdown::compute_bad_speculation(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
+std::vector<Entry> SapphireRapidsTopdown::compute_bad_speculation(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
 
-  // Get PAPI metrics for toplevel calculations
-  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
+    // Get PAPI metrics for toplevel calculations
+    Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+    Variant v_tma_metrics                = get_val_from_rec(rec, "perf::topdown-retiring");
 
-  // Check if any Variant is empty (use .empty())
-  bool is_incomplete =
-      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
+    // Check if any Variant is empty (use .empty())
+    bool is_incomplete = v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
 
-  // Check if bad values were obtained
-  if (is_incomplete)
-    return ret;
+    // Check if bad values were obtained
+    if (is_incomplete)
+        return ret;
 
-  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
+    uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
 
-  double bad_speculation =
-      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET);
-  double branch_mispredict = get_tma_percent_from_rdpmc_value(
-      tma_metric_papi_rdpmc, BR_MISPRED_OFFSET);
-  double machine_clears = std::max(0.0, bad_speculation - branch_mispredict);
+    double bad_speculation   = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET);
+    double branch_mispredict = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BR_MISPRED_OFFSET);
+    double machine_clears    = std::max(0.0, bad_speculation - branch_mispredict);
 
-  // Add toplevel metrics to vector of Entry
-  ret.reserve(2);
-  ret.push_back(Entry(m_result_attrs["branch_mispredict"],
-                      Variant(std::max(branch_mispredict, 0.0))));
-  ret.push_back(Entry(m_result_attrs["machine_clears"],
-                      Variant(std::max(machine_clears, 0.0))));
+    // Add toplevel metrics to vector of Entry
+    ret.reserve(2);
+    ret.push_back(Entry(m_result_attrs["branch_mispredict"], Variant(std::max(branch_mispredict, 0.0))));
+    ret.push_back(Entry(m_result_attrs["machine_clears"], Variant(std::max(machine_clears, 0.0))));
 
-  return ret;
+    return ret;
 }
 
-std::size_t SapphireRapidsTopdown::get_num_expected_bad_speculation() const {
-  return 2;
+std::size_t SapphireRapidsTopdown::get_num_expected_bad_speculation() const
+{
+    return 2;
 }
 
 } // namespace topdown
diff --git a/src/services/topdown/SapphireRapidsTopdown_read.cpp b/src/services/topdown/SapphireRapidsTopdown_read.cpp
index 1739e144..1e480505 100644
--- a/src/services/topdown/SapphireRapidsTopdown_read.cpp
+++ b/src/services/topdown/SapphireRapidsTopdown_read.cpp
@@ -2,301 +2,280 @@
 
 #include <algorithm>
 
-namespace cali {
-namespace topdown {
+namespace cali
+{
+namespace topdown
+{
 
 SapphireRapidsTopdown::SapphireRapidsTopdown(IntelTopdownLevel level)
     : cali::topdown::TopdownCalculator(
-          level,
-          // top_counters
-          "perf::slots"
-          ",perf::topdown-retiring"
-          ",perf::topdown-bad-spec"
-          ",perf::topdown-fe-bound"
-          ",perf::topdown-be-bound"
-          ",INT_MISC:UOP_DROPPING",
-          // all_counters
-          "perf::slots"
-          ",perf::topdown-retiring"
-          ",perf::topdown-bad-spec"
-          ",perf::topdown-fe-bound"
-          ",perf::topdown-be-bound"
-          ",INT_MISC:UOP_DROPPING"
-          ",perf_raw::r8400"  // topdown-heavy-ops
-          ",perf_raw::r8500"  // topdown-br-mispredict
-          ",perf_raw::r8600"  // topdown-fetch-lat
-          ",perf_raw::r8700", // topdown-mem-bound
-          // res_top
-          {"retiring", "backend_bound", "frontend_bound", "bad_speculation"},
-          // res_all
-          {"retiring", "backend_bound", "frontend_bound", "bad_speculation",
-           "branch_mispredict", "machine_clears", "frontend_latency",
-           "frontend_bandwidth", "memory_bound", "core_bound", "light_ops",
-           "heavy_ops"}) {}
-
-bool SapphireRapidsTopdown::check_for_disabled_multiplex() const {
-  return true;
+        level,
+        // top_counters
+        "perf::slots"
+        ",perf::topdown-retiring"
+        ",perf::topdown-bad-spec"
+        ",perf::topdown-fe-bound"
+        ",perf::topdown-be-bound"
+        ",INT_MISC:UOP_DROPPING",
+        // all_counters
+        "perf::slots"
+        ",perf::topdown-retiring"
+        ",perf::topdown-bad-spec"
+        ",perf::topdown-fe-bound"
+        ",perf::topdown-be-bound"
+        ",INT_MISC:UOP_DROPPING"
+        ",perf_raw::r8400"  // topdown-heavy-ops
+        ",perf_raw::r8500"  // topdown-br-mispredict
+        ",perf_raw::r8600"  // topdown-fetch-lat
+        ",perf_raw::r8700", // topdown-mem-bound
+        // res_top
+        { "retiring", "backend_bound", "frontend_bound", "bad_speculation" },
+        // res_all
+        { "retiring",
+          "backend_bound",
+          "frontend_bound",
+          "bad_speculation",
+          "branch_mispredict",
+          "machine_clears",
+          "frontend_latency",
+          "frontend_bandwidth",
+          "memory_bound",
+          "core_bound",
+          "light_ops",
+          "heavy_ops" }
+    )
+{}
+
+bool SapphireRapidsTopdown::check_for_disabled_multiplex() const
+{
+    return true;
 }
 
-std::vector<Entry>
-SapphireRapidsTopdown::compute_toplevel(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
-
-  // Get PAPI metrics for toplevel calculations
-  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_int_misc_uop_dropping =
-      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
-
-  // Check if any Variant is empty (use .empty())
-  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
-                       v_bad_spec.empty() || v_retiring.empty() ||
-                       v_int_misc_uop_dropping.empty() ||
-                       v_slots_or_info_thread_slots.empty();
-  // Check if all Variants are greater than 0 when casted to doubles (use
-  // .to_double())
-  bool is_nonzero =
-      v_fe_bound.to_double() > 0.0 && v_be_bound.to_double() > 0.0 &&
-      v_bad_spec.to_double() > 0.0 && v_retiring.to_double() > 0.0 &&
-      v_int_misc_uop_dropping.to_double() > 0.0 &&
-      v_slots_or_info_thread_slots.to_double() > 0.0;
-
-  // Check if bad values were obtained
-  if (is_incomplete || !is_nonzero)
-    return ret;
+std::vector<Entry> SapphireRapidsTopdown::compute_toplevel(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
+
+    // Get PAPI metrics for toplevel calculations
+    Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+    Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
+    Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
+    Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
+    Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
+    Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
+
+    // Check if any Variant is empty (use .empty())
+    bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || v_retiring.empty()
+                         || v_int_misc_uop_dropping.empty() || v_slots_or_info_thread_slots.empty();
+    // Check if all Variants are greater than 0 when casted to doubles (use
+    // .to_double())
+    bool is_nonzero = v_fe_bound.to_double() > 0.0 && v_be_bound.to_double() > 0.0 && v_bad_spec.to_double() > 0.0
+                      && v_retiring.to_double() > 0.0 && v_int_misc_uop_dropping.to_double() > 0.0
+                      && v_slots_or_info_thread_slots.to_double() > 0.0;
+
+    // Check if bad values were obtained
+    if (is_incomplete || !is_nonzero)
+        return ret;
+
+    // Perform toplevel calcs
+    double toplevel_sum =
+        (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
+
+    double retiring       = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
+    double frontend_bound = (v_fe_bound.to_double() / toplevel_sum)
+                            - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double());
+    double backend_bound   = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
+    double bad_speculation = std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
+
+    // Add toplevel metrics to vector of Entry
+    ret.reserve(4);
+    ret.push_back(Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
+    ret.push_back(Entry(m_result_attrs["backend_bound"], Variant(std::max(backend_bound, 0.0))));
+    ret.push_back(Entry(m_result_attrs["frontend_bound"], Variant(std::max(frontend_bound, 0.0))));
+    ret.push_back(Entry(m_result_attrs["bad_speculation"], Variant(std::max(bad_speculation, 0.0))));
 
-  // Perform toplevel calcs
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-
-  double retiring = (v_retiring.to_double() / toplevel_sum) +
-                    (0 * v_slots_or_info_thread_slots.to_double());
-  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
-                          (v_int_misc_uop_dropping.to_double() /
-                           v_slots_or_info_thread_slots.to_double());
-  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
-                         (0 * v_slots_or_info_thread_slots.to_double());
-  double bad_speculation =
-      std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
-
-  // Add toplevel metrics to vector of Entry
-  ret.reserve(4);
-  ret.push_back(
-      Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0))));
-  ret.push_back(Entry(m_result_attrs["backend_bound"],
-                      Variant(std::max(backend_bound, 0.0))));
-  ret.push_back(Entry(m_result_attrs["frontend_bound"],
-                      Variant(std::max(frontend_bound, 0.0))));
-  ret.push_back(Entry(m_result_attrs["bad_speculation"],
-                      Variant(std::max(bad_speculation, 0.0))));
-
-  return ret;
+    return ret;
 }
 
-std::size_t SapphireRapidsTopdown::get_num_expected_toplevel() const {
-  return 4;
+std::size_t SapphireRapidsTopdown::get_num_expected_toplevel() const
+{
+    return 4;
 }
 
-std::vector<Entry>
-SapphireRapidsTopdown::compute_retiring(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
-
-  // Get PAPI metrics for toplevel calculations
-  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_heavy_ops = get_val_from_rec(rec, "perf_raw::r8400");
-
-  // Check if any Variant is empty (use .empty())
-  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
-                       v_bad_spec.empty() || v_retiring.empty() ||
-                       v_slots_or_info_thread_slots.empty() ||
-                       v_heavy_ops.empty();
-
-  // Check if bad values were obtained
-  if (is_incomplete)
-    return ret;
+std::vector<Entry> SapphireRapidsTopdown::compute_retiring(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
+
+    // Get PAPI metrics for toplevel calculations
+    Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+    Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
+    Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
+    Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
+    Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
+    Variant v_heavy_ops                  = get_val_from_rec(rec, "perf_raw::r8400");
+
+    // Check if any Variant is empty (use .empty())
+    bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || v_retiring.empty()
+                         || v_slots_or_info_thread_slots.empty() || v_heavy_ops.empty();
 
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-  // Copied from compute_toplevel
-  double retiring = (v_retiring.to_double() / toplevel_sum) +
-                    (0 * v_slots_or_info_thread_slots.to_double());
+    // Check if bad values were obtained
+    if (is_incomplete)
+        return ret;
 
-  double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) +
-                     (0 * v_slots_or_info_thread_slots.to_double());
-  double light_ops = std::max(0.0, retiring - heavy_ops);
+    double toplevel_sum =
+        (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
+    // Copied from compute_toplevel
+    double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
 
-  // Add toplevel metrics to vector of Entry
-  ret.reserve(2);
-  ret.push_back(
-      Entry(m_result_attrs["heavy_ops"], Variant(std::max(heavy_ops, 0.0))));
-  ret.push_back(
-      Entry(m_result_attrs["light_ops"], Variant(std::max(light_ops, 0.0))));
+    double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
+    double light_ops = std::max(0.0, retiring - heavy_ops);
 
-  return ret;
+    // Add toplevel metrics to vector of Entry
+    ret.reserve(2);
+    ret.push_back(Entry(m_result_attrs["heavy_ops"], Variant(std::max(heavy_ops, 0.0))));
+    ret.push_back(Entry(m_result_attrs["light_ops"], Variant(std::max(light_ops, 0.0))));
+
+    return ret;
 }
 
-std::size_t SapphireRapidsTopdown::get_num_expected_retiring() const {
-  return 2;
+std::size_t SapphireRapidsTopdown::get_num_expected_retiring() const
+{
+    return 2;
 }
 
-std::vector<Entry>
-SapphireRapidsTopdown::compute_backend_bound(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
-
-  // Get PAPI metrics for toplevel calculations
-  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_memory_bound = get_val_from_rec(rec, "perf_raw::r8700");
-
-  // Check if any Variant is empty (use .empty())
-  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
-                       v_bad_spec.empty() || v_retiring.empty() ||
-                       v_slots_or_info_thread_slots.empty() ||
-                       v_memory_bound.empty();
-
-  // Check if bad values were obtained
-  if (is_incomplete)
-    return ret;
+std::vector<Entry> SapphireRapidsTopdown::compute_backend_bound(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
+
+    // Get PAPI metrics for toplevel calculations
+    Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+    Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
+    Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
+    Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
+    Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
+    Variant v_memory_bound               = get_val_from_rec(rec, "perf_raw::r8700");
+
+    // Check if any Variant is empty (use .empty())
+    bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || v_retiring.empty()
+                         || v_slots_or_info_thread_slots.empty() || v_memory_bound.empty();
 
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-  // Copied from compute_toplevel
-  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
-                         (0 * v_slots_or_info_thread_slots.to_double());
+    // Check if bad values were obtained
+    if (is_incomplete)
+        return ret;
 
-  double memory_bound = (v_memory_bound.to_double() / toplevel_sum) +
-                        (0 * v_slots_or_info_thread_slots.to_double());
-  double core_bound = std::max(0.0, backend_bound - memory_bound);
+    double toplevel_sum =
+        (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
+    // Copied from compute_toplevel
+    double backend_bound = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
 
-  // Add toplevel metrics to vector of Entry
-  ret.reserve(2);
-  ret.push_back(Entry(m_result_attrs["memory_bound"],
-                      Variant(std::max(memory_bound, 0.0))));
-  ret.push_back(
-      Entry(m_result_attrs["core_bound"], Variant(std::max(core_bound, 0.0))));
+    double memory_bound = (v_memory_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
+    double core_bound   = std::max(0.0, backend_bound - memory_bound);
 
-  return ret;
+    // Add toplevel metrics to vector of Entry
+    ret.reserve(2);
+    ret.push_back(Entry(m_result_attrs["memory_bound"], Variant(std::max(memory_bound, 0.0))));
+    ret.push_back(Entry(m_result_attrs["core_bound"], Variant(std::max(core_bound, 0.0))));
+
+    return ret;
 }
 
-std::size_t SapphireRapidsTopdown::get_num_expected_backend_bound() const {
-  return 2;
+std::size_t SapphireRapidsTopdown::get_num_expected_backend_bound() const
+{
+    return 2;
 }
 
-std::vector<Entry>
-SapphireRapidsTopdown::compute_frontend_bound(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
-
-  // Get PAPI metrics for toplevel calculations
-  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_int_misc_uop_dropping =
-      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
-  Variant v_fetch_latency = get_val_from_rec(rec, "perf_raw::r8600");
-
-  // Check if any Variant is empty (use .empty())
-  bool is_incomplete =
-      v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
-      v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
-      v_slots_or_info_thread_slots.empty() || v_fetch_latency.empty();
-
-  // Check if bad values were obtained
-  if (is_incomplete)
-    return ret;
+std::vector<Entry> SapphireRapidsTopdown::compute_frontend_bound(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
+
+    // Get PAPI metrics for toplevel calculations
+    Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+    Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
+    Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
+    Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
+    Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
+    Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
+    Variant v_fetch_latency              = get_val_from_rec(rec, "perf_raw::r8600");
+
+    // Check if any Variant is empty (use .empty())
+    bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || v_retiring.empty()
+                         || v_int_misc_uop_dropping.empty() || v_slots_or_info_thread_slots.empty()
+                         || v_fetch_latency.empty();
 
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-  // Copied from compute_toplevel
-  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
-                          (v_int_misc_uop_dropping.to_double() /
-                           v_slots_or_info_thread_slots.to_double());
+    // Check if bad values were obtained
+    if (is_incomplete)
+        return ret;
 
-  double fetch_latency = (v_fetch_latency.to_double() / toplevel_sum) -
-                         (v_int_misc_uop_dropping.to_double() /
-                          v_slots_or_info_thread_slots.to_double());
+    double toplevel_sum =
+        (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
+    // Copied from compute_toplevel
+    double frontend_bound = (v_fe_bound.to_double() / toplevel_sum)
+                            - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double());
 
-  double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency);
+    double fetch_latency = (v_fetch_latency.to_double() / toplevel_sum)
+                           - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double());
 
-  // Add toplevel metrics to vector of Entry
-  ret.reserve(2);
-  ret.push_back(Entry(m_result_attrs["frontend_latency"],
-                      Variant(std::max(fetch_latency, 0.0))));
-  ret.push_back(Entry(m_result_attrs["frontend_bandwidth"],
-                      Variant(std::max(fetch_bandwidth, 0.0))));
+    double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency);
 
-  return ret;
+    // Add toplevel metrics to vector of Entry
+    ret.reserve(2);
+    ret.push_back(Entry(m_result_attrs["frontend_latency"], Variant(std::max(fetch_latency, 0.0))));
+    ret.push_back(Entry(m_result_attrs["frontend_bandwidth"], Variant(std::max(fetch_bandwidth, 0.0))));
+
+    return ret;
 }
 
-std::size_t SapphireRapidsTopdown::get_num_expected_frontend_bound() const {
-  return 2;
+std::size_t SapphireRapidsTopdown::get_num_expected_frontend_bound() const
+{
+    return 2;
 }
 
-std::vector<Entry>
-SapphireRapidsTopdown::compute_bad_speculation(const std::vector<Entry> &rec) {
-  std::vector<Entry> ret;
-
-  // Get PAPI metrics for toplevel calculations
-  Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_int_misc_uop_dropping =
-      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
-  Variant v_branch_mispredict = get_val_from_rec(rec, "perf_raw::r8500");
-
-  // Check if any Variant is empty (use .empty())
-  bool is_incomplete =
-      v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
-      v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
-      v_slots_or_info_thread_slots.empty() || v_branch_mispredict.empty();
-
-  // Check if bad values were obtained
-  if (is_incomplete)
-    return ret;
+std::vector<Entry> SapphireRapidsTopdown::compute_bad_speculation(const std::vector<Entry>& rec)
+{
+    std::vector<Entry> ret;
+
+    // Get PAPI metrics for toplevel calculations
+    Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
+    Variant v_retiring                   = get_val_from_rec(rec, "perf::topdown-retiring");
+    Variant v_bad_spec                   = get_val_from_rec(rec, "perf::topdown-bad-spec");
+    Variant v_fe_bound                   = get_val_from_rec(rec, "perf::topdown-fe-bound");
+    Variant v_be_bound                   = get_val_from_rec(rec, "perf::topdown-be-bound");
+    Variant v_int_misc_uop_dropping      = get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
+    Variant v_branch_mispredict          = get_val_from_rec(rec, "perf_raw::r8500");
+
+    // Check if any Variant is empty (use .empty())
+    bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || v_retiring.empty()
+                         || v_int_misc_uop_dropping.empty() || v_slots_or_info_thread_slots.empty()
+                         || v_branch_mispredict.empty();
+
+    // Check if bad values were obtained
+    if (is_incomplete)
+        return ret;
+
+    // Perform toplevel calcs
+    double toplevel_sum =
+        (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double());
+
+    double retiring       = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
+    double frontend_bound = (v_fe_bound.to_double() / toplevel_sum)
+                            - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double());
+    double backend_bound   = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
+    double bad_speculation = std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
+
+    double branch_mispredict =
+        (v_branch_mispredict.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double());
+    double machine_clears = std::max(0.0, bad_speculation - branch_mispredict);
+
+    // Add toplevel metrics to vector of Entry
+    ret.reserve(2);
+    ret.push_back(Entry(m_result_attrs["branch_mispredict"], Variant(std::max(branch_mispredict, 0.0))));
+    ret.push_back(Entry(m_result_attrs["machine_clears"], Variant(std::max(machine_clears, 0.0))));
 
-  // Perform toplevel calcs
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-
-  double retiring = (v_retiring.to_double() / toplevel_sum) +
-                    (0 * v_slots_or_info_thread_slots.to_double());
-  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
-                          (v_int_misc_uop_dropping.to_double() /
-                           v_slots_or_info_thread_slots.to_double());
-  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
-                         (0 * v_slots_or_info_thread_slots.to_double());
-  double bad_speculation =
-      std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
-
-  double branch_mispredict = (v_branch_mispredict.to_double() / toplevel_sum) +
-                             (0 * v_slots_or_info_thread_slots.to_double());
-  double machine_clears = std::max(0.0, bad_speculation - branch_mispredict);
-
-  // Add toplevel metrics to vector of Entry
-  ret.reserve(2);
-  ret.push_back(Entry(m_result_attrs["branch_mispredict"],
-                      Variant(std::max(branch_mispredict, 0.0))));
-  ret.push_back(Entry(m_result_attrs["machine_clears"],
-                      Variant(std::max(machine_clears, 0.0))));
-
-  return ret;
+    return ret;
 }
 
-std::size_t SapphireRapidsTopdown::get_num_expected_bad_speculation() const {
-  return 2;
+std::size_t SapphireRapidsTopdown::get_num_expected_bad_speculation() const
+{
+    return 2;
 }
 
 } // namespace topdown
diff --git a/src/services/topdown/TopdownCalculator.cpp b/src/services/topdown/TopdownCalculator.cpp
index bbfa386f..ab5ab271 100644
--- a/src/services/topdown/TopdownCalculator.cpp
+++ b/src/services/topdown/TopdownCalculator.cpp
@@ -5,87 +5,96 @@
 
 #include <algorithm>
 
-namespace cali {
-namespace topdown {
+namespace cali
+{
+namespace topdown
+{
 
-Variant TopdownCalculator::get_val_from_rec(const std::vector<Entry> &rec,
-                                            const char *name) {
-  Variant ret;
+Variant TopdownCalculator::get_val_from_rec(const std::vector<Entry>& rec, const char* name)
+{
+    Variant ret;
 
-  auto c_it = m_counter_attrs.find(name);
-  if (c_it == m_counter_attrs.end())
-    return ret;
+    auto c_it = m_counter_attrs.find(name);
+    if (c_it == m_counter_attrs.end())
+        return ret;
 
-  cali_id_t attr_id = c_it->second.id();
+    cali_id_t attr_id = c_it->second.id();
 
-  auto it = std::find_if(rec.begin(), rec.end(), [attr_id](const Entry &e) {
-    return e.attribute() == attr_id;
-  });
+    auto it = std::find_if(rec.begin(), rec.end(), [attr_id](const Entry& e) { return e.attribute() == attr_id; });
 
-  if (it != rec.end())
-    ret = it->value();
-  else
-    ++m_counters_not_found[std::string(name)];
+    if (it != rec.end())
+        ret = it->value();
+    else
+        ++m_counters_not_found[std::string(name)];
 
-  return ret;
+    return ret;
 }
 
-TopdownCalculator::TopdownCalculator(IntelTopdownLevel level,
-                                     const char *top_counters,
-                                     const char *all_counters,
-                                     std::vector<const char *> &&res_top,
-                                     std::vector<const char *> &&res_all)
-    : m_level(level), m_top_counters(top_counters),
-      m_all_counters(all_counters), m_res_top(res_top), m_res_all(res_all) {}
-
-TopdownCalculator::TopdownCalculator(IntelTopdownLevel level)
-    : m_level(level) {}
-
-bool TopdownCalculator::find_counter_attrs(CaliperMetadataAccessInterface &db) {
-  const char *list = (m_level == All ? m_all_counters : m_top_counters);
-  auto counters = StringConverter(list).to_stringlist();
-
-  for (const auto &s : counters) {
-    Attribute attr = db.get_attribute(std::string("sum#papi.") + s);
-
-    if (!attr)
-      attr = db.get_attribute(std::string("papi.") + s);
-    if (!attr) {
-      Log(0).stream() << "topdown: " << s << " counter attribute not found!"
-                      << std::endl;
-      return false;
+TopdownCalculator::TopdownCalculator(
+    IntelTopdownLevel          level,
+    const char*                top_counters,
+    const char*                all_counters,
+    std::vector<const char*>&& res_top,
+    std::vector<const char*>&& res_all
+)
+    : m_level(level), m_top_counters(top_counters), m_all_counters(all_counters), m_res_top(res_top), m_res_all(res_all)
+{}
+
+TopdownCalculator::TopdownCalculator(IntelTopdownLevel level) : m_level(level)
+{}
+
+bool TopdownCalculator::find_counter_attrs(CaliperMetadataAccessInterface& db)
+{
+    const char* list     = (m_level == All ? m_all_counters : m_top_counters);
+    auto        counters = StringConverter(list).to_stringlist();
+
+    for (const auto& s : counters) {
+        Attribute attr = db.get_attribute(std::string("sum#papi.") + s);
+
+        if (!attr)
+            attr = db.get_attribute(std::string("papi.") + s);
+        if (!attr) {
+            Log(0).stream() << "topdown: " << s << " counter attribute not found!" << std::endl;
+            return false;
+        }
+
+        m_counter_attrs[s] = attr;
     }
 
-    m_counter_attrs[s] = attr;
-  }
-
-  return true;
+    return true;
 }
 
-void TopdownCalculator::make_result_attrs(CaliperMetadataAccessInterface &db) {
-  std::vector<const char *> &res = (m_level == Top ? m_res_top : m_res_all);
+void TopdownCalculator::make_result_attrs(CaliperMetadataAccessInterface& db)
+{
+    std::vector<const char*>& res = (m_level == Top ? m_res_top : m_res_all);
 
-  for (const char *s : res) {
-    m_result_attrs[std::string(s)] =
-        db.create_attribute(std::string("topdown.") + s, CALI_TYPE_DOUBLE,
-                            CALI_ATTR_ASVALUE | CALI_ATTR_SKIP_EVENTS);
-  }
+    for (const char* s : res) {
+        m_result_attrs[std::string(s)] = db.create_attribute(
+            std::string("topdown.") + s,
+            CALI_TYPE_DOUBLE,
+            CALI_ATTR_ASVALUE | CALI_ATTR_SKIP_EVENTS
+        );
+    }
 }
 
-const std::map<std::string, int> &
-TopdownCalculator::get_counters_not_found() const {
-  return m_counters_not_found;
+const std::map<std::string, int>& TopdownCalculator::get_counters_not_found() const
+{
+    return m_counters_not_found;
 }
 
-const char *TopdownCalculator::get_counters() const {
-  if (m_level == All) {
-    return m_all_counters;
-  } else {
-    return m_top_counters;
-  }
+const char* TopdownCalculator::get_counters() const
+{
+    if (m_level == All) {
+        return m_all_counters;
+    } else {
+        return m_top_counters;
+    }
 }
 
-IntelTopdownLevel TopdownCalculator::get_level() const { return m_level; }
+IntelTopdownLevel TopdownCalculator::get_level() const
+{
+    return m_level;
+}
 
 } // namespace topdown
 } // namespace cali
\ No newline at end of file
diff --git a/src/services/topdown/TopdownCalculator.h b/src/services/topdown/TopdownCalculator.h
index 9841580e..0bf29264 100644
--- a/src/services/topdown/TopdownCalculator.h
+++ b/src/services/topdown/TopdownCalculator.h
@@ -19,74 +19,99 @@
  */
 // clang-format on
 
-namespace cali {
-namespace topdown {
+namespace cali
+{
+namespace topdown
+{
 
 enum IntelTopdownLevel { All = 1, Top = 2 };
 
-class TopdownCalculator {
+class TopdownCalculator
+{
 protected:
-  IntelTopdownLevel m_level;
 
-  const char *m_top_counters;
-  const char *m_all_counters;
+    IntelTopdownLevel m_level;
 
-  std::vector<const char *> m_res_top;
-  std::vector<const char *> m_res_all;
+    const char* m_top_counters;
+    const char* m_all_counters;
 
-  std::map<std::string, Attribute> m_counter_attrs;
-  std::map<std::string, Attribute> m_result_attrs;
+    std::vector<const char*> m_res_top;
+    std::vector<const char*> m_res_all;
 
-  std::map<std::string, int> m_counters_not_found;
+    std::map<std::string, Attribute> m_counter_attrs;
+    std::map<std::string, Attribute> m_result_attrs;
 
-  Variant get_val_from_rec(const std::vector<Entry> &rec, const char *name);
+    std::map<std::string, int> m_counters_not_found;
 
-  TopdownCalculator(IntelTopdownLevel level, const char *top_counters,
-                    const char *all_counters,
-                    std::vector<const char *> &&res_top,
-                    std::vector<const char *> &&res_all);
+    Variant get_val_from_rec(const std::vector<Entry>& rec, const char* name);
+
+    TopdownCalculator(
+        IntelTopdownLevel          level,
+        const char*                top_counters,
+        const char*                all_counters,
+        std::vector<const char*>&& res_top,
+        std::vector<const char*>&& res_all
+    );
 
 public:
-  TopdownCalculator(IntelTopdownLevel level);
 
-  virtual ~TopdownCalculator() = default;
+    TopdownCalculator(IntelTopdownLevel level);
+
+    virtual ~TopdownCalculator() = default;
 
-  virtual bool check_for_disabled_multiplex() const = 0;
+    // Returns true if PAPI multiplexing cannot be used for the
+    // counters and/or architecture needed for the subclass
+    virtual bool check_for_disabled_multiplex() const = 0;
 
-  virtual std::vector<Entry>
-  compute_toplevel(const std::vector<Entry> &rec) = 0;
+    // Computes the L1 topdown metrics using the counters contained
+    // in the Caliper Entries.
+    virtual std::vector<Entry> compute_toplevel(const std::vector<Entry>& rec) = 0;
 
-  virtual std::size_t get_num_expected_toplevel() const = 0;
+    // Returns the expected size of the vectoor returned from
+    // compute_toplevel
+    virtual std::size_t get_num_expected_toplevel() const = 0;
 
-  virtual std::vector<Entry>
-  compute_retiring(const std::vector<Entry> &rec) = 0;
+    // Computes the topdown metrics beneath "Retiring" in the
+    // topdown hierarchy for the given architecture
+    virtual std::vector<Entry> compute_retiring(const std::vector<Entry>& rec) = 0;
 
-  virtual std::size_t get_num_expected_retiring() const = 0;
+    // Returns the expected size of the vector returned from
+    // compute_retiring
+    virtual std::size_t get_num_expected_retiring() const = 0;
 
-  virtual std::vector<Entry>
-  compute_backend_bound(const std::vector<Entry> &rec) = 0;
+    // Computes the topdown metrics beneath "Backend bound" in the
+    // topdown hierarchy for the given architecture
+    virtual std::vector<Entry> compute_backend_bound(const std::vector<Entry>& rec) = 0;
 
-  virtual std::size_t get_num_expected_backend_bound() const = 0;
+    // Returns the expected size of the vector returned from
+    // compute_backend_bounnd
+    virtual std::size_t get_num_expected_backend_bound() const = 0;
 
-  virtual std::vector<Entry>
-  compute_frontend_bound(const std::vector<Entry> &rec) = 0;
+    // Computes the topdown metrics beneath "Frontend bound" in the
+    // topdown hierarchy for the given architecture
+    virtual std::vector<Entry> compute_frontend_bound(const std::vector<Entry>& rec) = 0;
 
-  virtual std::size_t get_num_expected_frontend_bound() const = 0;
+    // Returns the expected size of the vector returned from
+    // compute_frontend_bounnd
+    virtual std::size_t get_num_expected_frontend_bound() const = 0;
 
-  virtual std::vector<Entry>
-  compute_bad_speculation(const std::vector<Entry> &rec) = 0;
+    // Computes the topdown metrics beneath "Bad speculation" in the
+    // topdown hierarchy for the given architecture
+    virtual std::vector<Entry> compute_bad_speculation(const std::vector<Entry>& rec) = 0;
 
-  virtual std::size_t get_num_expected_bad_speculation() const = 0;
+    // Returns the expected size of the vector returned from
+    // compute_bad_speculation
+    virtual std::size_t get_num_expected_bad_speculation() const = 0;
 
-  bool find_counter_attrs(CaliperMetadataAccessInterface &db);
+    bool find_counter_attrs(CaliperMetadataAccessInterface& db);
 
-  void make_result_attrs(CaliperMetadataAccessInterface &db);
+    void make_result_attrs(CaliperMetadataAccessInterface& db);
 
-  const std::map<std::string, int> &get_counters_not_found() const;
+    const std::map<std::string, int>& get_counters_not_found() const;
 
-  const char *get_counters() const;
+    const char* get_counters() const;
 
-  IntelTopdownLevel get_level() const;
+    IntelTopdownLevel get_level() const;
 };
 
 } // namespace topdown