From 5b257cd5d01321c8224b6eba162c3250ccc3639c Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 14 Nov 2024 16:46:52 -0500
Subject: [PATCH 01/32] typos

---
 libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp  | 2 +-
 .../libtatum/libtatum/tatum/TimingConstraints.cpp        | 2 +-
 .../libtatum/libtatum/tatum/TimingConstraints.hpp        | 2 +-
 .../libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp  | 2 +-
 .../tatum/graph_visitors/CommonAnalysisVisitor.hpp       | 4 ++--
 .../libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp   | 4 ++--
 .../tatum/graph_walkers/ParallelLevelizedWalker.hpp      | 2 +-
 vpr/src/place/place.cpp                                  | 9 ++++-----
 vpr/src/timing/concrete_timing_info.h                    | 2 +-
 9 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp
index 85f50e9ac62..f17fba8a752 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp
@@ -16,7 +16,7 @@ namespace tatum {
 
 /** \file
  * The 'SetupAnalysis' class defines the operations needed by a GraphWalker class
- * to perform a setup (max/longest path) analysis. It satisifes and extends the GraphVisitor 
+ * to perform a setup (max/longest path) analysis. It satisfies and extends the GraphVisitor
  * concept class.
  *
  * Setup Analysis Principles
diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.cpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.cpp
index 510fadd9e51..8eeff58d50b 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.cpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.cpp
@@ -132,7 +132,7 @@ Time TimingConstraints::setup_constraint(const DomainId src_domain, const Domain
         return iter->second;
     }
 
-    //If no capture node specific constraint was found, fallback to the domain pair constriant
+    //If no capture node specific constraint was found, fallback to the domain pair constraint
     iter = setup_constraints_.find(NodeDomainPair(src_domain, sink_domain, NodeId::INVALID()));
     if(iter != setup_constraints_.end()) {
         return iter->second;
diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.hpp
index 07288ed08ba..225ac48f7d5 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.hpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.hpp
@@ -45,7 +45,7 @@ class TimingConstraints {
         ///\returns The source NodeId of the specified domain
         NodeId clock_domain_source_node(const DomainId id) const;
 
-        //\returns whether the specified domain id corresponds to a virtual lcock
+        //\returns whether the specified domain id corresponds to a virtual clock
         bool is_virtual_clock(const DomainId id) const;
 
         ///\returns The domain of the specified node id if it is a clock source
diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp
index 70a8bbe2758..7b7f0540891 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp
@@ -9,7 +9,7 @@ namespace tatum { namespace detail {
  *
  * The operations for CommonAnalysisVisitor to perform setup analysis.
  * The setup analysis operations define that maximum edge delays are used, and that the 
- * maixmum arrival time (and minimum required times) are propagated through the timing graph.
+ * maximum arrival time (and minimum required times) are propagated through the timing graph.
  *
  * \see HoldAnalysisOps
  * \see SetupAnalysisOps
diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisVisitor.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisVisitor.hpp
index 6b901b21def..82bbd8da30a 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisVisitor.hpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisVisitor.hpp
@@ -152,10 +152,10 @@ bool CommonAnalysisVisitor<AnalysisOps>::do_arrival_pre_traverse_node(const Timi
     bool node_constrained = false;
 
     if(tc.node_is_constant_generator(node_id)) {
-        //We progpagate the tags from constant generators to ensure any sinks driven 
+        //We propagate the tags from constant generators to ensure any sinks driven
         //only by constant generators are recorded as constrained.
         //
-        //We use a special tag to initialize constant generators which gets overritten
+        //We use a special tag to initialize constant generators which gets overwritten
         //by any non-constant tag at downstream nodes
 
         TimingTag const_gen_tag = ops_.const_gen_tag();
diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp
index 313efa244d7..253b31af5ba 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp
@@ -8,7 +8,7 @@ namespace tatum { namespace detail {
  *
  * The operations for CommonAnalysisVisitor to perform setup analysis.
  * The setup analysis operations define that maximum edge delays are used, and that the 
- * maixmum arrival time (and minimum required times) are propagated through the timing graph.
+ * maximum arrival time (and minimum required times) are propagated through the timing graph.
  *
  * \see HoldAnalysisOps
  * \see CommonAnalysisVisitor
@@ -121,7 +121,7 @@ class SetupAnalysisOps : public CommonAnalysisOps {
         Time calculate_slack(const Time required_time, const Time arrival_time) {
             //Setup requires the arrival to occur *before* the required time, so
             //slack is the amount of required time left after the arrival time; meaning
-            //we we subtract the arrival time from the required time to get the setup slack
+            //we subtract the arrival time from the required time to get the setup slack
             return required_time - arrival_time;
         }
 
diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/ParallelLevelizedWalker.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/ParallelLevelizedWalker.hpp
index 0cbf1a5863b..0104d10d3e3 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/ParallelLevelizedWalker.hpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/ParallelLevelizedWalker.hpp
@@ -11,7 +11,7 @@
 namespace tatum {
 
 /**
- * A parallel timing analyzer which traveres the timing graph in a levelized
+ * A parallel timing analyzer which traverses the timing graph in a levelized
  * manner.  However nodes within each level are processed in parallel using
  * Thread Building Blocks (TBB). If TBB is not available it operates serially and is 
  * equivalent to the SerialWalker.
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 9af234a13f8..4e463ae0e0e 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -145,11 +145,10 @@ void try_place(const Netlist<>& net_list,
      * if is_flat is false, even if is_flat is set to true from the command line.
      */
     VTR_ASSERT(!is_flat);
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& atom_ctx = g_vpr_ctx.atom();
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    auto& timing_ctx = g_vpr_ctx.timing();
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& atom_ctx = g_vpr_ctx.atom();
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& timing_ctx = g_vpr_ctx.timing();
     auto pre_place_timing_stats = timing_ctx.stats;
 
     t_placer_costs costs(placer_opts.place_algorithm, noc_opts.noc);
diff --git a/vpr/src/timing/concrete_timing_info.h b/vpr/src/timing/concrete_timing_info.h
index 9aaae0d82ff..ce02e2abe90 100644
--- a/vpr/src/timing/concrete_timing_info.h
+++ b/vpr/src/timing/concrete_timing_info.h
@@ -454,7 +454,7 @@ class ConstantTimingInfo : public SetupHoldTimingInfo {
 /** Create a SetupTimingInfo for the given delay calculator */
 template<class DelayCalc>
 std::unique_ptr<SetupTimingInfo> make_setup_timing_info(std::shared_ptr<DelayCalc> delay_calculator, e_timing_update_type update_type) {
-    auto& timing_ctx = g_vpr_ctx.timing();
+    const auto& timing_ctx = g_vpr_ctx.timing();
 
     std::shared_ptr<tatum::SetupTimingAnalyzer> analyzer;
 

From bae8502eba7c3c7fe7a5e2beb5a704dd995a06c6 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 16 Nov 2024 15:21:32 -0500
Subject: [PATCH 02/32] partially added Placer class

---
 vpr/src/base/clustered_netlist_utils.h |   1 +
 vpr/src/place/place.cpp                | 344 +------------------------
 vpr/src/place/place_delay_model.cpp    |   2 +-
 vpr/src/place/place_delay_model.h      |   2 +-
 vpr/src/place/placer.cpp               | 281 ++++++++++++++++++++
 vpr/src/place/placer.h                 |  74 ++++++
 vpr/src/place/timing_place_lookup.cpp  |  10 +-
 vpr/src/place/timing_place_lookup.h    |   2 +-
 vpr/src/util/vpr_utils.h               |   1 +
 9 files changed, 377 insertions(+), 340 deletions(-)
 create mode 100644 vpr/src/place/placer.cpp
 create mode 100644 vpr/src/place/placer.h

diff --git a/vpr/src/base/clustered_netlist_utils.h b/vpr/src/base/clustered_netlist_utils.h
index 52688f88e47..b5d1504ed91 100644
--- a/vpr/src/base/clustered_netlist_utils.h
+++ b/vpr/src/base/clustered_netlist_utils.h
@@ -14,6 +14,7 @@ class ClusteredPinAtomPinsLookup {
     typedef typename vtr::Range<atom_pin_iterator> atom_pin_range;
 
   public:
+    ClusteredPinAtomPinsLookup() = default;
     ClusteredPinAtomPinsLookup(const ClusteredNetlist& clustered_netlist, const AtomNetlist& atom_netlist, const IntraLbPbPinLookup& pb_gpin_lookup);
 
     atom_pin_range connected_atom_pins(ClusterPinId clustered_pin) const;
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 4e463ae0e0e..f7e31bc5bb3 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -53,7 +53,7 @@
 
 #include "net_cost_handler.h"
 #include "placer_state.h"
-
+#include "placer.h"
 
 /********************* Static subroutines local to place.c *******************/
 #ifdef VERBOSE
@@ -69,30 +69,8 @@ void print_clb_placement(const char* fname);
 static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
                        const RRGraphView& rr_graph);
 
-static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& placer_opts,
-                                                       const t_noc_opts& noc_opts,
-                                                       const std::vector<t_direct_inf>& directs,
-                                                       PlacerState& placer_state,
-                                                       std::optional<NocCostHandler>& noc_cost_handler);
-
 static void free_placement_structs();
 
-static void check_place(const t_placer_costs& costs,
-                        const PlaceDelayModel* delay_model,
-                        const PlacerCriticalities* criticalities,
-                        const t_place_algorithm& place_algorithm,
-                        const t_noc_opts& noc_opts,
-                        PlacerState& placer_state,
-                        NetCostHandler& net_cost_handler,
-                        const std::optional<NocCostHandler>& noc_cost_handler);
-
-static int check_placement_costs(const t_placer_costs& costs,
-                                 const PlaceDelayModel* delay_model,
-                                 const PlacerCriticalities* criticalities,
-                                 const t_place_algorithm& place_algorithm,
-                                 PlacerState& placer_state,
-                                 NetCostHandler& net_cost_handler);
-
 static int count_connections();
 
 static void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
@@ -151,25 +129,15 @@ void try_place(const Netlist<>& net_list,
     const auto& timing_ctx = g_vpr_ctx.timing();
     auto pre_place_timing_stats = timing_ctx.stats;
 
-    t_placer_costs costs(placer_opts.place_algorithm, noc_opts.noc);
 
-    tatum::TimingPathInfo critical_path;
     float sTNS = NAN;
     float sWNS = NAN;
 
     char msg[vtr::bufsize];
 
-    t_placement_checkpoint placement_checkpoint;
-
-    std::shared_ptr<SetupTimingInfo> timing_info;
-    std::shared_ptr<PlacementDelayCalculator> placement_delay_calc;
-    std::unique_ptr<PlaceDelayModel> place_delay_model;
-    std::unique_ptr<PlacerSetupSlacks> placer_setup_slacks;
-    std::unique_ptr<PlacerCriticalities> placer_criticalities;
-    std::unique_ptr<NetPinTimingInvalidator> pin_timing_invalidator;
-
-    t_pl_blocks_to_be_moved blocks_affected(net_list.blocks().size());
-
+    /* Placement delay model is independent of the placement and can be shared across
+     * multiple placers. So, it is created and initialized once. */
+    std::shared_ptr<PlaceDelayModel> place_delay_model;
     if (placer_opts.place_algorithm.is_timing_driven()) {
         /*do this before the initial placement to avoid messing up the initial placement */
         place_delay_model = alloc_lookups_and_delay_model(net_list,
@@ -195,213 +163,26 @@ void try_place(const Netlist<>& net_list,
 
     int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333));
 
-    PlacerState placer_state(placer_opts.place_algorithm.is_timing_driven(), cube_bb);
-    auto& blk_loc_registry = placer_state.mutable_blk_loc_registry();
-    const auto& p_timing_ctx = placer_state.timing();
-    const auto& p_runtime_ctx = placer_state.runtime();
-
-    vtr::RngContainer rng(placer_opts.seed);
+    auto& place_ctx = g_vpr_ctx.mutable_placement();
+    place_ctx.lock_loc_vars();
+    place_ctx.compressed_block_grids = create_compressed_block_grids();
 
-    std::optional<NocCostHandler> noc_cost_handler;
-    // create cost handler objects
-    NetCostHandler net_cost_handler = alloc_and_load_placement_structs(placer_opts, noc_opts, directs,
-                                                                       placer_state, noc_cost_handler);
+    Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb);
 
 #ifndef NO_GRAPHICS
-    if (noc_cost_handler.has_value()) {
-        get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(noc_cost_handler->get_link_bandwidth_usages());
+    if (placer.noc_cost_handler_.has_value()) {
+        get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(placer.noc_cost_handler_->get_link_bandwidth_usages());
     }
 #endif
 
-    vtr::ScopedStartFinishTimer timer("Placement");
-
-    if (noc_opts.noc) {
-        normalize_noc_cost_weighting_factor(const_cast<t_noc_opts&>(noc_opts));
-    }
-
-    initial_placement(placer_opts, placer_opts.constraints_file.c_str(),
-                      noc_opts, blk_loc_registry, noc_cost_handler, rng);
-
-    //create the move generator based on the chosen strategy
-    auto [move_generator, move_generator2] = create_move_generators(placer_state, placer_opts, move_lim, noc_opts.noc_centroid_weight, rng);
-
-    if (!placer_opts.write_initial_place_file.empty()) {
-        print_place(nullptr, nullptr, placer_opts.write_initial_place_file.c_str(), placer_state.block_locs());
-    }
-
-#ifdef ENABLE_ANALYTIC_PLACE
-    /*
-     * Analytic Placer:
-     *  Passes in the initial_placement via vpr_context, and passes its placement back via locations marked on
-     *  both the clb_netlist and the gird.
-     *  Most of anneal is disabled later by setting initial temperature to 0 and only further optimizes in quench
-     */
-    if (placer_opts.enable_analytic_placer) {
-        AnalyticPlacer{blk_loc_registry}.ap_place();
-    }
-
-#endif /* ENABLE_ANALYTIC_PLACE */
-
-    // Update physical pin values
-    for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) {
-        blk_loc_registry.place_sync_external_block_connections(block_id);
-    }
-
     const int width_fac = placer_opts.place_chan_width;
-    init_draw_coords((float)width_fac, blk_loc_registry);
-
-    /* Allocated here because it goes into timing critical code where each memory allocation is expensive */
-    IntraLbPbPinLookup pb_gpin_lookup(device_ctx.logical_block_types);
-    //Enables fast look-up of atom pins connect to CLB pins
-    ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, pb_gpin_lookup);
-
-    /* Gets initial cost and loads bounding boxes. */
-
-    if (placer_opts.place_algorithm.is_timing_driven()) {
-        costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL);
-
-        int num_connections = count_connections();
-        VTR_LOG("\n");
-        VTR_LOG("There are %d point to point connections in this circuit.\n",
-                num_connections);
-        VTR_LOG("\n");
-
-        //Update the point-to-point delays from the initial placement
-        comp_td_connection_delays(place_delay_model.get(), placer_state);
-
-        /*
-         * Initialize timing analysis
-         */
-        // For placement, we don't use flat-routing
-        placement_delay_calc = std::make_shared<PlacementDelayCalculator>(atom_ctx.nlist,
-                                                                          atom_ctx.lookup,
-                                                                          p_timing_ctx.connection_delay,
-                                                                          is_flat);
-        placement_delay_calc->set_tsu_margin_relative(placer_opts.tsu_rel_margin);
-        placement_delay_calc->set_tsu_margin_absolute(placer_opts.tsu_abs_margin);
-
-        timing_info = make_setup_timing_info(placement_delay_calc, placer_opts.timing_update_type);
-
-        placer_setup_slacks = std::make_unique<PlacerSetupSlacks>(cluster_ctx.clb_nlist, netlist_pin_lookup);
-
-        placer_criticalities = std::make_unique<PlacerCriticalities>(cluster_ctx.clb_nlist, netlist_pin_lookup);
-
-        pin_timing_invalidator = make_net_pin_timing_invalidator(
-            placer_opts.timing_update_type,
-            net_list,
-            netlist_pin_lookup,
-            atom_ctx.nlist,
-            atom_ctx.lookup,
-            *timing_info->timing_graph(),
-            is_flat);
-
-        //First time compute timing and costs, compute from scratch
-        PlaceCritParams crit_params;
-        crit_params.crit_exponent = placer_opts.td_place_exp_first;
-        crit_params.crit_limit = placer_opts.place_crit_limit;
-
-        initialize_timing_info(crit_params, place_delay_model.get(), placer_criticalities.get(),
-                               placer_setup_slacks.get(), pin_timing_invalidator.get(),
-                               timing_info.get(), &costs, placer_state);
-
-        critical_path = timing_info->least_slack_critical_path();
-
-        /* Write out the initial timing echo file */
-        if (isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)) {
-            tatum::write_echo(
-                getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH),
-                *timing_ctx.graph, *timing_ctx.constraints,
-                *placement_delay_calc, timing_info->analyzer());
-
-            tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node);
-
-            write_setup_timing_graph_dot(
-                getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)
-                    + std::string(".dot"),
-                *timing_info, debug_tnode);
-        }
-
-        /* Initialize the normalization factors. Calling costs.update_norm_factors() *
-         * here would fail the golden results of strong_sdc benchmark                */
-        costs.timing_cost_norm = 1 / costs.timing_cost;
-        costs.bb_cost_norm = 1 / costs.bb_cost;
-    } else {
-        VTR_ASSERT(placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE);
-
-        /* Total cost is the same as wirelength cost normalized*/
-        costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL);
-        costs.bb_cost_norm = 1 / costs.bb_cost;
-
-        /* Timing cost and normalization factors are not used */
-        constexpr double INVALID_COST = std::numeric_limits<double>::quiet_NaN();
-        costs.timing_cost = INVALID_COST;
-        costs.timing_cost_norm = INVALID_COST;
-    }
-
-    if (noc_opts.noc) {
-        VTR_ASSERT(noc_cost_handler.has_value());
-
-        // get the costs associated with the NoC
-        costs.noc_cost_terms.aggregate_bandwidth = noc_cost_handler->comp_noc_aggregate_bandwidth_cost();
-        std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = noc_cost_handler->comp_noc_latency_cost();
-        costs.noc_cost_terms.congestion = noc_cost_handler->comp_noc_congestion_cost();
-
-        // initialize all the noc normalization factors
-        noc_cost_handler->update_noc_normalization_factors(costs);
-    }
-
-    // set the starting total placement cost
-    costs.cost = costs.get_total_cost(placer_opts, noc_opts);
-
-    //Sanity check that initial placement is legal
-    check_place(costs,
-                place_delay_model.get(),
-                placer_criticalities.get(),
-                placer_opts.place_algorithm,
-                noc_opts,
-                placer_state,
-                net_cost_handler,
-                noc_cost_handler);
-
-    //Initial placement statistics
-    VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost,
-            costs.bb_cost, costs.timing_cost);
-    if (noc_opts.noc) {
-        VTR_ASSERT(noc_cost_handler.has_value());
-
-        noc_cost_handler->print_noc_costs("Initial NoC Placement Costs", costs, noc_opts);
-    }
-    if (placer_opts.place_algorithm.is_timing_driven()) {
-        VTR_LOG(
-            "Initial placement estimated Critical Path Delay (CPD): %g ns\n",
-            1e9 * critical_path.delay());
-        VTR_LOG(
-            "Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n",
-            1e9 * timing_info->setup_total_negative_slack());
-        VTR_LOG(
-            "Initial placement estimated setup Worst Negative Slack (sWNS): %g ns\n",
-            1e9 * timing_info->setup_worst_negative_slack());
-        VTR_LOG("\n");
-
-        VTR_LOG("Initial placement estimated setup slack histogram:\n");
-        print_histogram(create_setup_slack_histogram(*timing_info->setup_analyzer()));
-    }
-
-    size_t num_macro_members = 0;
-    for (auto& macro : blk_loc_registry.place_macros().macros()) {
-        num_macro_members += macro.members.size();
-    }
-    VTR_LOG(
-        "Placement contains %zu placement macros involving %zu blocks (average macro size %f)\n",
-        blk_loc_registry.place_macros().macros().size(), num_macro_members,
-        float(num_macro_members) / blk_loc_registry.place_macros().macros().size());
-    VTR_LOG("\n");
+    init_draw_coords((float)width_fac, placer.placer_state_.blk_loc_registry());
 
     sprintf(msg,
             "Initial Placement.  Cost: %g  BB Cost: %g  TD Cost %g \t Channel Factor: %d",
             costs.cost, costs.bb_cost, costs.timing_cost, width_fac);
 
-    //Draw the initial placement
+    // Draw the initial placement
     update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info);
 
     if (placer_opts.placement_saves_per_temperature >= 1) {
@@ -525,8 +306,6 @@ void try_place(const Netlist<>& net_list,
         print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs());
     }
 
-    // TODO:
-    // 1. add some subroutine hierarchy!  Too big!
 
     //#ifdef VERBOSE
     //    if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_END_CLB_PLACEMENT)) {
@@ -668,28 +447,6 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
     return cube_bb;
 }
 
-/* Allocates the major structures needed only by the placer, primarily for *
- * computing costs quickly and such.                                       */
-static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& placer_opts,
-                                                       const t_noc_opts& noc_opts,
-                                                       const std::vector<t_direct_inf>& directs,
-                                                       PlacerState& placer_state,
-                                                       std::optional<NocCostHandler>& noc_cost_handler) {
-    auto& place_ctx = g_vpr_ctx.mutable_placement();
-
-    place_ctx.lock_loc_vars();
-
-    init_placement_context(placer_state.mutable_blk_loc_registry(), directs);
-
-    place_ctx.compressed_block_grids = create_compressed_block_grids();
-
-    if (noc_opts.noc) {
-        noc_cost_handler.emplace(placer_state.block_locs());
-    }
-
-    return NetCostHandler{placer_opts, placer_state, place_ctx.cube_bb};
-}
-
 /* Frees the major structures needed by the placer (and not needed       *
  * elsewhere).   */
 static void free_placement_structs() {
@@ -697,83 +454,6 @@ static void free_placement_structs() {
     vtr::release_memory(place_ctx.compressed_block_grids);
 }
 
-static void check_place(const t_placer_costs& costs,
-                        const PlaceDelayModel* delay_model,
-                        const PlacerCriticalities* criticalities,
-                        const t_place_algorithm& place_algorithm,
-                        const t_noc_opts& noc_opts,
-                        PlacerState& placer_state,
-                        NetCostHandler& net_cost_handler,
-                        const std::optional<NocCostHandler>& noc_cost_handler) {
-    /* Checks that the placement has not confused our data structures. *
-     * i.e. the clb and block structures agree about the locations of  *
-     * every block, blocks are in legal spots, etc.  Also recomputes   *
-     * the final placement cost from scratch and makes sure it is      *
-     * within roundoff of what we think the cost is.                   */
-    const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
-    const DeviceGrid& device_grid = g_vpr_ctx.device().grid;
-    const auto& cluster_constraints = g_vpr_ctx.floorplanning().cluster_constraints;
-
-    int error = 0;
-
-    // Verify the placement invariants independent to the placement flow.
-    error += verify_placement(placer_state.blk_loc_registry(),
-                              clb_nlist,
-                              device_grid,
-                              cluster_constraints);
-
-    error += check_placement_costs(costs, delay_model, criticalities, place_algorithm, placer_state, net_cost_handler);
-
-    if (noc_opts.noc) {
-        // check the NoC costs during placement if the user is using the NoC supported flow
-        error += noc_cost_handler->check_noc_placement_costs(costs, PL_INCREMENTAL_COST_TOLERANCE, noc_opts);
-        // make sure NoC routing configuration does not create any cycles in CDG
-        error += (int)noc_cost_handler->noc_routing_has_cycle();
-    }
-
-    if (error == 0) {
-        VTR_LOG("\n");
-        VTR_LOG("Completed placement consistency check successfully.\n");
-
-    } else {
-        VPR_ERROR(VPR_ERROR_PLACE,
-                  "\nCompleted placement consistency check, %d errors found.\n"
-                  "Aborting program.\n",
-                  error);
-    }
-}
-
-static int check_placement_costs(const t_placer_costs& costs,
-                                 const PlaceDelayModel* delay_model,
-                                 const PlacerCriticalities* criticalities,
-                                 const t_place_algorithm& place_algorithm,
-                                 PlacerState& placer_state,
-                                 NetCostHandler& net_cost_handler) {
-    int error = 0;
-    double timing_cost_check;
-
-    double bb_cost_check = net_cost_handler.comp_bb_cost(e_cost_methods::CHECK);
-
-    if (fabs(bb_cost_check - costs.bb_cost) > costs.bb_cost * PL_INCREMENTAL_COST_TOLERANCE) {
-        VTR_LOG_ERROR(
-            "bb_cost_check: %g and bb_cost: %g differ in check_place.\n",
-            bb_cost_check, costs.bb_cost);
-        error++;
-    }
-
-    if (place_algorithm.is_timing_driven()) {
-        comp_td_costs(delay_model, *criticalities, placer_state, &timing_cost_check);
-        //VTR_LOG("timing_cost recomputed from scratch: %g\n", timing_cost_check);
-        if (fabs(timing_cost_check - costs.timing_cost) > costs.timing_cost * PL_INCREMENTAL_COST_TOLERANCE) {
-            VTR_LOG_ERROR(
-                "timing_cost_check: %g and timing_cost: %g differ in check_place.\n",
-                timing_cost_check, costs.timing_cost);
-            error++;
-        }
-    }
-    return error;
-}
-
 #ifdef VERBOSE
 void print_clb_placement(const char* fname) {
     /* Prints out the clb placements to a file.  */
diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/place_delay_model.cpp
index 4f626a5817f..36070bf8423 100644
--- a/vpr/src/place/place_delay_model.cpp
+++ b/vpr/src/place/place_delay_model.cpp
@@ -318,7 +318,7 @@ void OverrideDelayModel::write(const std::string& file) const {
 #endif
 
 ///@brief Initialize the placer delay model.
-std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
+std::shared_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
                                                                t_chan_width_dist chan_width_dist,
                                                                const t_placer_opts& placer_opts,
                                                                const t_router_opts& router_opts,
diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/place_delay_model.h
index 0aa01385e6e..d1cd3c2164a 100644
--- a/vpr/src/place/place_delay_model.h
+++ b/vpr/src/place/place_delay_model.h
@@ -29,7 +29,7 @@ class PlaceDelayModel;
 class PlacerState;
 
 ///@brief Initialize the placer delay model.
-std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
+std::shared_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
                                                                t_chan_width_dist chan_width_dist,
                                                                const t_placer_opts& place_opts,
                                                                const t_router_opts& router_opts,
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
new file mode 100644
index 00000000000..d0b59b7a17a
--- /dev/null
+++ b/vpr/src/place/placer.cpp
@@ -0,0 +1,281 @@
+
+#include "placer.h"
+
+#include "vtr_time.h"
+#include "read_place.h"
+#include "analytic_placer.h"
+#include "initial_placement.h"
+#include "concrete_timing_info.h"
+#include "tatum/echo_writer.hpp"
+#include "verify_placement.h"
+#include "place_timing_update.h"
+
+Placer::Placer(const Netlist<>& net_list,
+               const t_placer_opts& placer_opts,
+               const t_analysis_opts& analysis_opts,
+               const t_noc_opts& noc_opts,
+               const std::vector<t_direct_inf>& directs,
+               std::shared_ptr<PlaceDelayModel> place_delay_model,
+               bool cube_bb)
+    : placer_opts_(placer_opts)
+    , noc_opts_(noc_opts)
+    , costs_(placer_opts.place_algorithm, noc_opts.noc)
+    , placer_state_(placer_opts.place_algorithm.is_timing_driven(), cube_bb)
+    , rng_(placer_opts.seed)
+    , net_cost_handler_(placer_opts, placer_state_, cube_bb)
+    , place_delay_model_(place_delay_model){
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& atom_ctx = g_vpr_ctx.atom();
+
+    init_placement_context(placer_state_.mutable_blk_loc_registry(), directs);
+
+    // create a NoC cost handler if NoC optimization is enabled
+    if (noc_opts.noc) {
+        noc_cost_handler_.emplace(placer_state_.block_locs());
+    }
+
+    // Start measuring placement time
+    timer_ = std::make_unique<vtr::ScopedStartFinishTimer>("Placement");
+
+    /* To make sure the importance of NoC-related cost terms compared to
+     * BB and timing cost is determine only through NoC placement weighting factor,
+     * we normalize NoC-related cost weighting factors so that they add up to 1.
+     * With this normalization, NoC-related cost weighting factors only determine
+     * the relative importance of NoC cost terms with respect to each other, while
+     * the importance of total NoC cost to conventional placement cost is determined
+     * by NoC placement weighting factor.
+     */
+    if (noc_opts.noc) {
+        normalize_noc_cost_weighting_factor(const_cast<t_noc_opts&>(noc_opts));
+    }
+
+
+    BlkLocRegistry& blk_loc_registry = placer_state_.mutable_blk_loc_registry();
+    initial_placement(placer_opts, placer_opts.constraints_file.c_str(),
+                      noc_opts, blk_loc_registry, noc_cost_handler_, rng_);
+
+    //create the move generator based on the chosen placement strategy
+//    auto [move_generator, move_generator2] = create_move_generators(placer_state_, placer_opts, move_lim, noc_opts.noc_centroid_weight, rng_);
+
+    if (!placer_opts.write_initial_place_file.empty()) {
+        print_place(nullptr, nullptr, placer_opts.write_initial_place_file.c_str(), placer_state_.block_locs());
+    }
+
+#ifdef ENABLE_ANALYTIC_PLACE
+    /*
+     * Analytic Placer:
+     *  Passes in the initial_placement via vpr_context, and passes its placement back via locations marked on
+     *  both the clb_netlist and the gird.
+     *  Most of anneal is disabled later by setting initial temperature to 0 and only further optimizes in quench
+     */
+    if (placer_opts.enable_analytic_placer) {
+        AnalyticPlacer{blk_loc_registry}.ap_place();
+    }
+
+#endif /* ENABLE_ANALYTIC_PLACE */
+
+    // Update physical pin values
+   for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) {
+       blk_loc_registry.place_sync_external_block_connections(block_id);
+   }
+
+   // Allocate here because it goes into timing critical code where each memory allocation is expensive
+   pb_gpin_lookup_ = IntraLbPbPinLookup(device_ctx.logical_block_types);
+   // Enables fast look-up of atom pins connect to CLB pins
+   netlist_pin_lookup_ = ClusteredPinAtomPinsLookup(cluster_ctx.clb_nlist, atom_ctx.nlist, pb_gpin_lookup_);
+
+   // Gets initial cost and loads bounding boxes.
+   costs_.bb_cost = net_cost_handler_.comp_bb_cost(e_cost_methods::NORMAL);
+   costs_.bb_cost_norm = 1 / costs_.bb_cost;
+
+   if (placer_opts.place_algorithm.is_timing_driven()) {
+       alloc_and_init_timing_objects_(net_list, analysis_opts);
+   } else {
+       VTR_ASSERT(placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE);
+       // Timing cost and normalization factors are not used
+       constexpr double INVALID_COST = std::numeric_limits<double>::quiet_NaN();
+       costs_.timing_cost = INVALID_COST;
+       costs_.timing_cost_norm = INVALID_COST;
+   }
+
+   if (noc_opts.noc) {
+       VTR_ASSERT(noc_cost_handler_.has_value());
+
+       // get the costs associated with the NoC
+       costs_.noc_cost_terms.aggregate_bandwidth = noc_cost_handler_->comp_noc_aggregate_bandwidth_cost();
+       std::tie(costs_.noc_cost_terms.latency, costs_.noc_cost_terms.latency_overrun) = noc_cost_handler_->comp_noc_latency_cost();
+       costs_.noc_cost_terms.congestion = noc_cost_handler_->comp_noc_congestion_cost();
+
+       // initialize all the noc normalization factors
+       noc_cost_handler_->update_noc_normalization_factors(costs_);
+   }
+
+   // set the starting total placement cost
+   costs_.cost = costs_.get_total_cost(placer_opts, noc_opts);
+}
+
+void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list,
+                                            const t_analysis_opts& analysis_opts) {
+   const auto& atom_ctx = g_vpr_ctx.atom();
+   const auto& cluster_ctx = g_vpr_ctx.clustering();
+   const auto& timing_ctx = g_vpr_ctx.timing();
+   const auto& p_timing_ctx = placer_state_.timing();
+
+   // Update the point-to-point delays from the initial placement
+   comp_td_connection_delays(place_delay_model_.get(), placer_state_);
+
+   // Initialize timing analysis
+   placement_delay_calc_ = std::make_shared<PlacementDelayCalculator>(atom_ctx.nlist,
+                                                                      atom_ctx.lookup,
+                                                                      p_timing_ctx.connection_delay,
+                                                                      /*is_flat=*/false);
+   placement_delay_calc_->set_tsu_margin_relative(placer_opts_.tsu_rel_margin);
+   placement_delay_calc_->set_tsu_margin_absolute(placer_opts_.tsu_abs_margin);
+
+   timing_info_ = make_setup_timing_info(placement_delay_calc_, placer_opts_.timing_update_type);
+
+   placer_setup_slacks_ = std::make_unique<PlacerSetupSlacks>(cluster_ctx.clb_nlist, netlist_pin_lookup_);
+
+   placer_criticalities_ = std::make_unique<PlacerCriticalities>(cluster_ctx.clb_nlist, netlist_pin_lookup_);
+
+   pin_timing_invalidator_ = make_net_pin_timing_invalidator(placer_opts_.timing_update_type,
+                                                             net_list,
+                                                             netlist_pin_lookup_,
+                                                             atom_ctx.nlist,
+                                                             atom_ctx.lookup,
+                                                             *timing_info_->timing_graph(),
+                                                             /*is_flat=*/false);
+
+   // First time compute timing and costs, compute from scratch
+   PlaceCritParams crit_params;
+   crit_params.crit_exponent = placer_opts_.td_place_exp_first;
+   crit_params.crit_limit = placer_opts_.place_crit_limit;
+
+   initialize_timing_info(crit_params, place_delay_model_.get(), placer_criticalities_.get(),
+                          placer_setup_slacks_.get(), pin_timing_invalidator_.get(),
+                          timing_info_.get(), &costs_, placer_state_);
+
+   critical_path_ = timing_info_->least_slack_critical_path();
+
+   // Write out the initial timing echo file
+   if (isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)) {
+       tatum::write_echo(
+           getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH),
+           *timing_ctx.graph, *timing_ctx.constraints,
+           *placement_delay_calc_, timing_info_->analyzer());
+
+       tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node);
+
+       write_setup_timing_graph_dot(
+           getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)
+               + std::string(".dot"),
+           *timing_info_, debug_tnode);
+   }
+
+   costs_.timing_cost_norm = 1 / costs_.timing_cost;
+
+   // Sanity check that initial placement is legal
+   check_place_();
+
+   print_initial_placement_stats_();
+
+#ifndef ENABLE_ANALYTIC_PLACE
+   annealer_ = std::make_unique(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler,
+                                noc_opts, rng, std::move(move_generator), std::move(move_generator2), place_delay_model.get(),
+                                placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(),
+                                move_lim);
+#endif
+}
+
+void Placer::check_place_() {
+   const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
+   const DeviceGrid& device_grid = g_vpr_ctx.device().grid;
+   const auto& cluster_constraints = g_vpr_ctx.floorplanning().cluster_constraints;
+
+   int error = 0;
+
+   // Verify the placement invariants independent to the placement flow.
+   error += verify_placement(placer_state_.blk_loc_registry(),
+                             clb_nlist,
+                             device_grid,
+                             cluster_constraints);
+
+   error += check_placement_costs_();
+
+   if (noc_opts_.noc) {
+       // check the NoC costs during placement if the user is using the NoC supported flow
+       error += noc_cost_handler_->check_noc_placement_costs(costs_, PL_INCREMENTAL_COST_TOLERANCE, noc_opts_);
+       // make sure NoC routing configuration does not create any cycles in CDG
+       error += (int)noc_cost_handler_->noc_routing_has_cycle();
+   }
+
+   if (error == 0) {
+       VTR_LOG("\n");
+       VTR_LOG("Completed placement consistency check successfully.\n");
+
+   } else {
+       VPR_ERROR(VPR_ERROR_PLACE,
+                 "\nCompleted placement consistency check, %d errors found.\n"
+                 "Aborting program.\n",
+                 error);
+   }
+}
+
+int Placer::check_placement_costs_() {
+   int error = 0;
+   double timing_cost_check;
+
+   double bb_cost_check = net_cost_handler_.comp_bb_cost(e_cost_methods::CHECK);
+
+   if (fabs(bb_cost_check - costs_.bb_cost) > costs_.bb_cost * PL_INCREMENTAL_COST_TOLERANCE) {
+       VTR_LOG_ERROR(
+           "bb_cost_check: %g and bb_cost: %g differ in check_place.\n",
+           bb_cost_check, costs_.bb_cost);
+       error++;
+   }
+
+   if (placer_opts_.place_algorithm.is_timing_driven()) {
+       comp_td_costs(place_delay_model_.get(), *placer_criticalities_, placer_state_, &timing_cost_check);
+       //VTR_LOG("timing_cost recomputed from scratch: %g\n", timing_cost_check);
+       if (fabs(timing_cost_check - costs_.timing_cost) > costs_.timing_cost * PL_INCREMENTAL_COST_TOLERANCE) {
+           VTR_LOG_ERROR(
+               "timing_cost_check: %g and timing_cost: %g differ in check_place.\n",
+               timing_cost_check, costs_.timing_cost);
+           error++;
+       }
+   }
+   return error;
+}
+
+void Placer::print_initial_placement_stats_() {
+   VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n",
+           costs_.cost, costs_.bb_cost, costs_.timing_cost);
+
+   if (noc_opts_.noc) {
+       VTR_ASSERT(noc_cost_handler_.has_value());
+       noc_cost_handler_->print_noc_costs("Initial NoC Placement Costs", costs_, noc_opts_);
+   }
+
+   if (placer_opts_.place_algorithm.is_timing_driven()) {
+       VTR_LOG("Initial placement estimated Critical Path Delay (CPD): %g ns\n",
+               1e9 * critical_path_.delay());
+       VTR_LOG("Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n",
+               1e9 * timing_info_->setup_total_negative_slack());
+       VTR_LOG("Initial placement estimated setup Worst Negative Slack (sWNS): %g ns\n",
+               1e9 * timing_info_->setup_worst_negative_slack());
+       VTR_LOG("\n");
+       VTR_LOG("Initial placement estimated setup slack histogram:\n");
+       print_histogram(create_setup_slack_histogram(*timing_info_->setup_analyzer()));
+   }
+
+   const BlkLocRegistry& blk_loc_registry = placer_state_.blk_loc_registry();
+   size_t num_macro_members = 0;
+   for (const t_pl_macro& macro : blk_loc_registry.place_macros().macros()) {
+       num_macro_members += macro.members.size();
+   }
+   VTR_LOG("Placement contains %zu placement macros involving %zu blocks (average macro size %f)\n",
+           blk_loc_registry.place_macros().macros().size(), num_macro_members,
+           float(num_macro_members) / blk_loc_registry.place_macros().macros().size());
+   VTR_LOG("\n");
+}
\ No newline at end of file
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
new file mode 100644
index 00000000000..57ebce2a8a2
--- /dev/null
+++ b/vpr/src/place/placer.h
@@ -0,0 +1,74 @@
+
+
+#ifndef VTR_PLACER_H
+#define VTR_PLACER_H
+
+#include <memory>
+#include <optional>
+
+#include "timing_place.h"
+#include "place_checkpoint.h"
+#include "PlacementDelayCalculator.h"
+#include "placer_state.h"
+#include "noc_place_utils.h"
+#include "net_cost_handler.h"
+
+
+class Placer {
+  public:
+    Placer(const Netlist<>& net_list,
+           const t_placer_opts& placer_opts,
+           const t_analysis_opts& analysis_opts,
+           const t_noc_opts& noc_opts,
+           const std::vector<t_direct_inf>& directs,
+           std::shared_ptr<PlaceDelayModel> place_delay_model,
+           bool cube_bb);
+
+
+    //TODO: make this private
+  public:
+    const t_placer_opts& placer_opts_;
+    const t_noc_opts& noc_opts_;
+    t_placer_costs costs_;
+    PlacerState placer_state_;
+    vtr::RngContainer rng_;
+    NetCostHandler net_cost_handler_;
+    std::optional<NocCostHandler> noc_cost_handler_;
+    std::shared_ptr<PlaceDelayModel> place_delay_model_;
+
+    t_placement_checkpoint placement_checkpoint_;
+
+    std::shared_ptr<SetupTimingInfo> timing_info_;
+    std::shared_ptr<PlacementDelayCalculator> placement_delay_calc_;
+    std::unique_ptr<PlacerSetupSlacks> placer_setup_slacks_;
+    std::unique_ptr<PlacerCriticalities> placer_criticalities_;
+    std::unique_ptr<NetPinTimingInvalidator> pin_timing_invalidator_;
+    tatum::TimingPathInfo critical_path_;
+
+
+    std::unique_ptr<vtr::ScopedStartFinishTimer> timer_;
+
+    IntraLbPbPinLookup pb_gpin_lookup_;
+    ClusteredPinAtomPinsLookup netlist_pin_lookup_;
+
+    std::unique_ptr<PlacementAnnealer> annealer_;
+
+  private:
+    void alloc_and_init_timing_objects_(const Netlist<>& net_list,
+                                        const t_analysis_opts& analysis_opts);
+
+    /**
+     * Checks that the placement has not confused our data structures.
+     * i.e. the clb and block structures agree about the locations of
+     * every block, blocks are in legal spots, etc.  Also recomputes
+     * the final placement cost from scratch and makes sure it is
+     * within round-off of what we think the cost is.
+     */
+    void check_place_();
+
+    int check_placement_costs_();
+
+    void print_initial_placement_stats_();
+};
+
+#endif //VTR_PLACER_H
diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp
index 86dc396e2b8..873633a9c5e 100644
--- a/vpr/src/place/timing_place_lookup.cpp
+++ b/vpr/src/place/timing_place_lookup.cpp
@@ -170,7 +170,7 @@ static float find_neighboring_average(vtr::NdMatrix<float, 4>& matrix,
 
 /******* Globally Accessible Functions **********/
 
-std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
+std::shared_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
                                                            const t_router_opts& router_opts,
                                                            const Netlist<>& net_list,
                                                            t_det_routing_arch* det_routing_arch,
@@ -196,15 +196,15 @@ std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts&
     int longest_length = get_longest_segment_length(segment_inf);
 
     /*now setup and compute the actual arrays */
-    std::unique_ptr<PlaceDelayModel> place_delay_model;
+    std::shared_ptr<PlaceDelayModel> place_delay_model;
     float min_cross_layer_delay = get_min_cross_layer_delay();
 
     if (placer_opts.delay_model_type == PlaceDelayModelType::SIMPLE) {
-        place_delay_model = std::make_unique<SimpleDelayModel>();
+        place_delay_model = std::make_shared<SimpleDelayModel>();
     } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) {
-        place_delay_model = std::make_unique<DeltaDelayModel>(min_cross_layer_delay, is_flat);
+        place_delay_model = std::make_shared<DeltaDelayModel>(min_cross_layer_delay, is_flat);
     } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) {
-        place_delay_model = std::make_unique<OverrideDelayModel>(min_cross_layer_delay, is_flat);
+        place_delay_model = std::make_shared<OverrideDelayModel>(min_cross_layer_delay, is_flat);
     } else {
         VTR_ASSERT_MSG(false, "Invalid placer delay model");
     }
diff --git a/vpr/src/place/timing_place_lookup.h b/vpr/src/place/timing_place_lookup.h
index fba3f470483..14897a7fcc4 100644
--- a/vpr/src/place/timing_place_lookup.h
+++ b/vpr/src/place/timing_place_lookup.h
@@ -2,7 +2,7 @@
 #define TIMING_PLACE_LOOKUP_H
 #include "place_delay_model.h"
 
-std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
+std::shared_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
                                                            const t_router_opts& router_opts,
                                                            const Netlist<>& net_list,
                                                            t_det_routing_arch* det_routing_arch,
diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h
index 6a4d97aefa1..8869cc55ddd 100644
--- a/vpr/src/util/vpr_utils.h
+++ b/vpr/src/util/vpr_utils.h
@@ -136,6 +136,7 @@ std::string rr_node_arch_name(RRNodeId inode, bool is_flat);
 //Class for looking up pb graph pins from block pin indices
 class IntraLbPbPinLookup {
   public:
+    IntraLbPbPinLookup() = default;
     IntraLbPbPinLookup(const std::vector<t_logical_block_type>& block_types);
     IntraLbPbPinLookup(const IntraLbPbPinLookup& rhs);
     IntraLbPbPinLookup& operator=(IntraLbPbPinLookup rhs);

From 2298704f9d6731338177ec07d4fa46dcf510c689 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 10:55:21 -0500
Subject: [PATCH 03/32] add place_log_util.h/.cpp

---
 .../librrgraph/src/base/rr_spatial_lookup.cpp |   2 +-
 vpr/src/base/read_options.cpp                 |   2 +-
 vpr/src/place/place_log_util.cpp              | 119 ++++++++++++++++++
 vpr/src/place/place_log_util.h                |  29 +++++
 vpr/src/route/router_lookahead_map.cpp        |  14 +--
 5 files changed, 157 insertions(+), 9 deletions(-)
 create mode 100644 vpr/src/place/place_log_util.cpp
 create mode 100644 vpr/src/place/place_log_util.h

diff --git a/libs/librrgraph/src/base/rr_spatial_lookup.cpp b/libs/librrgraph/src/base/rr_spatial_lookup.cpp
index 6959659be8b..3b23d7d49e9 100644
--- a/libs/librrgraph/src/base/rr_spatial_lookup.cpp
+++ b/libs/librrgraph/src/base/rr_spatial_lookup.cpp
@@ -163,7 +163,7 @@ std::vector<RRNodeId> RRSpatialLookup::find_nodes(int layer,
     nodes.reserve(num_nodes);
     for (const auto& node : rr_node_indices_[type][layer][node_x][node_y][side]) {
         if (RRNodeId(node)) {
-            nodes.push_back(RRNodeId(node));
+            nodes.emplace_back(node);
         }
     }
 
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 1641e255b89..f789f848808 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2256,7 +2256,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_timing_grp.add_argument<e_reducer, ParseReducer>(args.place_delay_model_reducer, "--place_delay_model_reducer")
-        .help("When calculating delta delays for the placment delay model how are multiple values combined?")
+        .help("When calculating delta delays for the placement delay model how are multiple values combined?")
         .default_value("min")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp
new file mode 100644
index 00000000000..d935f474d0a
--- /dev/null
+++ b/vpr/src/place/place_log_util.cpp
@@ -0,0 +1,119 @@
+#include "place_log_util.h"
+
+#include "vtr_log.h"
+#include "annealer.h"
+#include "place_util.h"
+
+void print_place_status_header(bool noc_enabled) {
+    VTR_LOG("\n");
+    if (!noc_enabled) {
+        VTR_LOG(
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
+        VTR_LOG(
+            "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha\n");
+        VTR_LOG(
+            "      (sec)                                          (ns)       (ns)     (ns)                                                 \n");
+        VTR_LOG(
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
+    } else {
+        VTR_LOG(
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- ---------  ---------\n");
+        VTR_LOG(
+            "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha Agg. BW  Agg. Lat Lat Over. NoC Cong.\n");
+        VTR_LOG(
+            "      (sec)                                          (ns)       (ns)     (ns)                                                   (bps)     (ns)     (ns)             \n");
+        VTR_LOG(
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- --------- ---------\n");
+    }
+}
+
+void print_place_status(const t_annealing_state& state,
+                        const t_placer_statistics& stats,
+                        float elapsed_sec,
+                        float cpd,
+                        float sTNS,
+                        float sWNS,
+                        size_t tot_moves,
+                        bool noc_enabled,
+                        const NocCostTerms& noc_cost_terms) {
+    VTR_LOG(
+        "%4zu %6.1f %7.1e "
+        "%7.3f %10.2f %-10.5g "
+        "%7.3f % 10.3g % 8.3f "
+        "%7.3f %7.4f %6.1f %8.2f",
+        state.num_temps, elapsed_sec, state.t,
+        stats.av_cost, stats.av_bb_cost, stats.av_timing_cost,
+        1e9 * cpd, 1e9 * sTNS, 1e9 * sWNS,
+        stats.success_rate, stats.std_dev, state.rlim, state.crit_exponent);
+
+    pretty_print_uint(" ", tot_moves, 9, 3);
+
+    VTR_LOG(" %6.3f", state.alpha);
+
+    if (noc_enabled) {
+        VTR_LOG(
+            " %7.2e %7.2e"
+            " %8.2e %8.2f",
+            noc_cost_terms.aggregate_bandwidth, noc_cost_terms.latency,
+            noc_cost_terms.latency_overrun, noc_cost_terms.congestion);
+    }
+
+    VTR_LOG("\n");
+    fflush(stdout);
+}
+
+void print_resources_utilization(const BlkLocRegistry& blk_loc_registry) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& block_locs = blk_loc_registry.block_locs();
+
+    size_t max_block_name = 0;
+    size_t max_tile_name = 0;
+
+    //Record the resource requirement
+    std::map<t_logical_block_type_ptr, size_t> num_type_instances;
+    std::map<t_logical_block_type_ptr, std::map<t_physical_tile_type_ptr, size_t>> num_placed_instances;
+
+    for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) {
+        const t_pl_loc& loc = block_locs[blk_id].loc;
+
+        t_physical_tile_type_ptr physical_tile = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer});
+        t_logical_block_type_ptr logical_block = cluster_ctx.clb_nlist.block_type(blk_id);
+
+        num_type_instances[logical_block]++;
+        num_placed_instances[logical_block][physical_tile]++;
+
+        max_block_name = std::max(max_block_name, logical_block->name.length());
+        max_tile_name = std::max(max_tile_name, physical_tile->name.length());
+    }
+
+    VTR_LOG("\n");
+    VTR_LOG("Placement resource usage:\n");
+    for (const auto [logical_block_type_ptr, _] : num_type_instances) {
+        for (const auto [physical_tile_type_ptr, num_instances] : num_placed_instances[logical_block_type_ptr]) {
+            VTR_LOG("  %-*s implemented as %-*s: %d\n", max_block_name,
+                    logical_block_type_ptr->name.c_str(), max_tile_name,
+                    physical_tile_type_ptr->name.c_str(), num_instances);
+        }
+    }
+    VTR_LOG("\n");
+}
+
+void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats) {
+    size_t total_swap_attempts = swap_stats.num_swap_rejected + swap_stats.num_swap_accepted + swap_stats.num_swap_aborted;
+    VTR_ASSERT(total_swap_attempts > 0);
+
+    size_t num_swap_print_digits = ceil(log10(total_swap_attempts));
+    float reject_rate = (float)swap_stats.num_swap_rejected / total_swap_attempts;
+    float accept_rate = (float)swap_stats.num_swap_accepted / total_swap_attempts;
+    float abort_rate = (float)swap_stats.num_swap_aborted / total_swap_attempts;
+    VTR_LOG("Placement number of temperatures: %d\n", state.num_temps);
+    VTR_LOG("Placement total # of swap attempts: %*d\n", num_swap_print_digits,
+            total_swap_attempts);
+    VTR_LOG("\tSwaps accepted: %*d (%4.1f %%)\n", num_swap_print_digits,
+            swap_stats.num_swap_accepted, 100 * accept_rate);
+    VTR_LOG("\tSwaps rejected: %*d (%4.1f %%)\n", num_swap_print_digits,
+            swap_stats.num_swap_rejected, 100 * reject_rate);
+    VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits,
+            swap_stats.num_swap_aborted, 100 * abort_rate);
+}
\ No newline at end of file
diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h
new file mode 100644
index 00000000000..22a2bbd9b03
--- /dev/null
+++ b/vpr/src/place/place_log_util.h
@@ -0,0 +1,29 @@
+
+#ifndef VTR_PLACE_LOG_UTIL_H
+#define VTR_PLACE_LOG_UTIL_H
+
+#include <cstddef>
+
+class t_annealing_state;
+class t_placer_statistics;
+struct NocCostTerms;
+struct t_swap_stats;
+class BlkLocRegistry;
+
+void print_place_status_header(bool noc_enabled);
+
+void print_place_status(const t_annealing_state& state,
+                        const t_placer_statistics& stats,
+                        float elapsed_sec,
+                        float cpd,
+                        float sTNS,
+                        float sWNS,
+                        size_t tot_moves,
+                        bool noc_enabled,
+                        const NocCostTerms& noc_cost_terms);
+
+void print_resources_utilization(const BlkLocRegistry& blk_loc_registry);
+
+void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats);
+
+#endif //VTR_PLACE_LOG_UTIL_H
diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index 88cd75d46c8..a3d468b901c 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -507,12 +507,12 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm
     auto& grid = device_ctx.grid;
 
     //Re-allocate
-    f_wire_cost_map = t_wire_cost_map({static_cast<unsigned long>(grid.get_num_layers()), 
-                                        static_cast<unsigned long>(grid.get_num_layers()), 
-                                        2,
-                                        segment_inf_vec.size(),
-                                        device_ctx.grid.width(),
-                                        device_ctx.grid.height()});
+    f_wire_cost_map = t_wire_cost_map({static_cast<unsigned long>(grid.get_num_layers()),
+                                       static_cast<unsigned long>(grid.get_num_layers()),
+                                       2,
+                                       segment_inf_vec.size(),
+                                       device_ctx.grid.width(),
+                                       device_ctx.grid.height()});
 
     int longest_seg_length = 0;
     for (const auto& seg_inf : segment_inf_vec) {
@@ -536,7 +536,7 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm
                                                                                        chan_type,
                                                                                        segment_inf,
                                                                                        std::unordered_map<int, std::unordered_set<int>>(),
-                                                                                       true);
+                                                                                       /*sample_all_locs=*/true);
                 if (routing_cost_map.empty()) {
                     continue;
                 }

From afc789b2302592ddb485c9ea389c93c7c937ca19 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 12:03:27 -0500
Subject: [PATCH 04/32] added place() method to Placer class

---
 vpr/src/place/place.cpp            | 307 +----------------------------
 vpr/src/place/place_checkpoint.cpp |   2 +-
 vpr/src/place/place_checkpoint.h   |   2 +-
 vpr/src/place/placer.cpp           | 158 ++++++++++++++-
 vpr/src/place/placer.h             |   1 +
 5 files changed, 168 insertions(+), 302 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index f7e31bc5bb3..8344bfd5ff9 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -80,22 +80,6 @@ static void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
                                                bool is_flat,
                                                const BlkLocRegistry& blk_loc_registry);
 
-static void print_place_status_header(bool noc_enabled);
-
-static void print_place_status(const t_annealing_state& state,
-                               const t_placer_statistics& stats,
-                               float elapsed_sec,
-                               float cpd,
-                               float sTNS,
-                               float sWNS,
-                               size_t tot_moves,
-                               bool noc_enabled,
-                               const NocCostTerms& noc_cost_terms);
-
-static void print_resources_utilization(const BlkLocRegistry& blk_loc_registry);
-
-static void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats);
-
 /**
  * @brief Copies the placement location variables into the global placement context.
  * @param blk_loc_registry The placement location variables to be copied.
@@ -129,10 +113,6 @@ void try_place(const Netlist<>& net_list,
     const auto& timing_ctx = g_vpr_ctx.timing();
     auto pre_place_timing_stats = timing_ctx.stats;
 
-
-    float sTNS = NAN;
-    float sWNS = NAN;
-
     char msg[vtr::bufsize];
 
     /* Placement delay model is independent of the placement and can be shared across
@@ -192,141 +172,6 @@ void try_place(const Netlist<>& net_list,
         print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs());
     }
 
-    bool skip_anneal = false;
-
-#ifdef ENABLE_ANALYTIC_PLACE
-    // Analytic placer: When enabled, skip most of the annealing and go straight to quench
-    // TODO: refactor goto label.
-    if (placer_opts.enable_analytic_placer) {
-        skip_anneal = true;
-    }
-#endif /* ENABLE_ANALYTIC_PLACE */
-
-    PlacementAnnealer annealer(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler,
-                               noc_opts, rng, std::move(move_generator), std::move(move_generator2), place_delay_model.get(),
-                               placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), move_lim);
-
-    const t_annealing_state& annealing_state = annealer.get_annealing_state();
-    const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats();
-
-    if (!skip_anneal) {
-        //Table header
-        VTR_LOG("\n");
-        print_place_status_header(noc_opts.noc);
-
-        /* Outer loop of the simulated annealing begins */
-        do {
-            vtr::Timer temperature_timer;
-
-            annealer.outer_loop_update_timing_info();
-
-            if (placer_opts.place_algorithm.is_timing_driven()) {
-                critical_path = timing_info->least_slack_critical_path();
-                sTNS = timing_info->setup_total_negative_slack();
-                sWNS = timing_info->setup_worst_negative_slack();
-
-                // see if we should save the current placement solution as a checkpoint
-                if (placer_opts.place_checkpointing && annealer.get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) {
-                    save_placement_checkpoint_if_needed(blk_loc_registry.block_locs(),
-                                                        placement_checkpoint,
-                                                        timing_info, costs, critical_path.delay());
-                }
-            }
-
-            // do a complete inner loop iteration
-            annealer.placement_inner_loop();
-
-            print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(),
-                               critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(),
-                               noc_opts.noc, costs.noc_cost_terms);
-
-            sprintf(msg, "Cost: %g  BB Cost %g  TD Cost %g  Temperature: %g",
-                    costs.cost, costs.bb_cost, costs.timing_cost, annealing_state.t);
-            update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info);
-
-            //#ifdef VERBOSE
-            //            if (getEchoEnabled()) {
-            //                print_clb_placement("first_iteration_clb_placement.echo");
-            //            }
-            //#endif
-        } while (annealer.outer_loop_update_state());
-        /* Outer loop of the simulated annealing ends */
-    } //skip_anneal ends
-
-    // Start Quench
-    annealer.start_quench();
-
-    auto pre_quench_timing_stats = timing_ctx.stats;
-    { /* Quench */
-
-        vtr::ScopedFinishTimer temperature_timer("Placement Quench");
-
-        annealer.outer_loop_update_timing_info();
-
-        /* Run inner loop again with temperature = 0 so as to accept only swaps
-         * which reduce the cost of the placement */
-        annealer.placement_inner_loop();
-
-        if (placer_opts.place_quench_algorithm.is_timing_driven()) {
-            critical_path = timing_info->least_slack_critical_path();
-            sTNS = timing_info->setup_total_negative_slack();
-            sWNS = timing_info->setup_worst_negative_slack();
-        }
-
-        print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(),
-                           critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(),
-                           noc_opts.noc, costs.noc_cost_terms);
-    }
-    auto post_quench_timing_stats = timing_ctx.stats;
-
-    //Final timing analysis
-    PlaceCritParams crit_params;
-    crit_params.crit_exponent = annealing_state.crit_exponent;
-    crit_params.crit_limit = placer_opts.place_crit_limit;
-
-    if (placer_opts.place_algorithm.is_timing_driven()) {
-        perform_full_timing_update(crit_params, place_delay_model.get(), placer_criticalities.get(),
-                                   placer_setup_slacks.get(), pin_timing_invalidator.get(),
-                                   timing_info.get(), &costs, placer_state);
-        VTR_LOG("post-quench CPD = %g (ns) \n",
-                1e9 * timing_info->least_slack_critical_path().delay());
-    }
-
-    //See if our latest checkpoint is better than the current placement solution
-    if (placer_opts.place_checkpointing)
-        restore_best_placement(placer_state,
-                               placement_checkpoint, timing_info, costs,
-                               placer_criticalities, placer_setup_slacks, place_delay_model,
-                               pin_timing_invalidator, crit_params, noc_cost_handler);
-
-    if (placer_opts.placement_saves_per_temperature >= 1) {
-        std::string filename = vtr::string_fmt("placement_%03d_%03d.place",
-                                               annealing_state.num_temps + 1, 0);
-        VTR_LOG("Saving final placement to file: %s\n", filename.c_str());
-        print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs());
-    }
-
-
-    //#ifdef VERBOSE
-    //    if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_END_CLB_PLACEMENT)) {
-    //        print_clb_placement(getEchoFileName(E_ECHO_END_CLB_PLACEMENT));
-    //    }
-    //#endif
-
-    // Update physical pin values
-    for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) {
-        blk_loc_registry.place_sync_external_block_connections(block_id);
-    }
-
-    check_place(costs,
-                place_delay_model.get(),
-                placer_criticalities.get(),
-                placer_opts.place_algorithm,
-                noc_opts,
-                placer_state,
-                net_cost_handler,
-                noc_cost_handler);
-
     //Some stats
     VTR_LOG("\n");
     VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called);
@@ -339,23 +184,19 @@ void try_place(const Netlist<>& net_list,
         critical_path = timing_info->least_slack_critical_path();
 
         if (isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) {
-            tatum::write_echo(
-                getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH),
-                *timing_ctx.graph, *timing_ctx.constraints,
-                *placement_delay_calc, timing_info->analyzer());
-
-            tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(
-                analysis_opts.echo_dot_timing_graph_node);
-            write_setup_timing_graph_dot(
-                getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)
-                    + std::string(".dot"),
-                *timing_info, debug_tnode);
+            tatum::write_echo(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH),
+                              *timing_ctx.graph, *timing_ctx.constraints,
+                              *placement_delay_calc, timing_info->analyzer());
+
+            tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node);
+            write_setup_timing_graph_dot(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"),
+                                         *timing_info, debug_tnode);
         }
 
         generate_post_place_timing_reports(placer_opts, analysis_opts, *timing_info,
                                            *placement_delay_calc, is_flat, blk_loc_registry);
 
-        /* Print critical path delay metrics */
+        // Print critical path delay metrics
         VTR_LOG("\n");
         print_setup_timing_summary(*timing_ctx.constraints,
                                    *timing_info->setup_analyzer(), "Placement estimated ", "");
@@ -380,28 +221,9 @@ void try_place(const Netlist<>& net_list,
     }
 
     update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info);
-    // Print out swap statistics
-    print_resources_utilization(blk_loc_registry);
-
-    print_placement_swaps_stats(annealing_state, swap_stats);
-
-    move_type_stats.print_placement_move_types_stats();
-
-    if (noc_opts.noc) {
-        write_noc_placement_file(noc_opts.noc_placement_file_name, blk_loc_registry.block_locs());
-    }
 
     free_placement_structs();
 
-    print_timing_stats("Placement Quench", post_quench_timing_stats, pre_quench_timing_stats);
-    print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats);
-
-    VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n",
-            p_runtime_ctx.f_update_td_costs_connections_elapsed_sec,
-            p_runtime_ctx.f_update_td_costs_nets_elapsed_sec,
-            p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec,
-            p_runtime_ctx.f_update_td_costs_total_elapsed_sec);
-
     copy_locs_to_global_state(blk_loc_registry);
 }
 
@@ -505,119 +327,6 @@ static void update_screen_debug() {
 }
 #endif
 
-static void print_place_status_header(bool noc_enabled) {
-    if (!noc_enabled) {
-        VTR_LOG(
-            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
-        VTR_LOG(
-            "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha\n");
-        VTR_LOG(
-            "      (sec)                                          (ns)       (ns)     (ns)                                                 \n");
-        VTR_LOG(
-            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
-    } else {
-        VTR_LOG(
-            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- ---------  ---------\n");
-        VTR_LOG(
-            "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha Agg. BW  Agg. Lat Lat Over. NoC Cong.\n");
-        VTR_LOG(
-            "      (sec)                                          (ns)       (ns)     (ns)                                                   (bps)     (ns)     (ns)             \n");
-        VTR_LOG(
-            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- --------- ---------\n");
-    }
-}
-
-static void print_place_status(const t_annealing_state& state,
-                               const t_placer_statistics& stats,
-                               float elapsed_sec,
-                               float cpd,
-                               float sTNS,
-                               float sWNS,
-                               size_t tot_moves,
-                               bool noc_enabled,
-                               const NocCostTerms& noc_cost_terms) {
-    VTR_LOG(
-        "%4zu %6.1f %7.1e "
-        "%7.3f %10.2f %-10.5g "
-        "%7.3f % 10.3g % 8.3f "
-        "%7.3f %7.4f %6.1f %8.2f",
-        state.num_temps, elapsed_sec, state.t,
-        stats.av_cost, stats.av_bb_cost, stats.av_timing_cost,
-        1e9 * cpd, 1e9 * sTNS, 1e9 * sWNS,
-        stats.success_rate, stats.std_dev, state.rlim, state.crit_exponent);
-
-    pretty_print_uint(" ", tot_moves, 9, 3);
-
-    VTR_LOG(" %6.3f", state.alpha);
-
-    if (noc_enabled) {
-        VTR_LOG(
-            " %7.2e %7.2e"
-            " %8.2e %8.2f",
-            noc_cost_terms.aggregate_bandwidth, noc_cost_terms.latency,
-            noc_cost_terms.latency_overrun, noc_cost_terms.congestion);
-    }
-
-    VTR_LOG("\n");
-    fflush(stdout);
-}
-
-static void print_resources_utilization(const BlkLocRegistry& blk_loc_registry) {
-    const auto& cluster_ctx = g_vpr_ctx.clustering();
-    const auto& device_ctx = g_vpr_ctx.device();
-    const auto& block_locs = blk_loc_registry.block_locs();
-
-    size_t max_block_name = 0;
-    size_t max_tile_name = 0;
-
-    //Record the resource requirement
-    std::map<t_logical_block_type_ptr, size_t> num_type_instances;
-    std::map<t_logical_block_type_ptr, std::map<t_physical_tile_type_ptr, size_t>> num_placed_instances;
-
-    for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) {
-        const t_pl_loc& loc = block_locs[blk_id].loc;
-
-        t_physical_tile_type_ptr physical_tile = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer});
-        t_logical_block_type_ptr logical_block = cluster_ctx.clb_nlist.block_type(blk_id);
-
-        num_type_instances[logical_block]++;
-        num_placed_instances[logical_block][physical_tile]++;
-
-        max_block_name = std::max(max_block_name, logical_block->name.length());
-        max_tile_name = std::max(max_tile_name, physical_tile->name.length());
-    }
-
-    VTR_LOG("\n");
-    VTR_LOG("Placement resource usage:\n");
-    for (const auto [logical_block_type_ptr, _] : num_type_instances) {
-        for (const auto [physical_tile_type_ptr, num_instances] : num_placed_instances[logical_block_type_ptr]) {
-            VTR_LOG("  %-*s implemented as %-*s: %d\n", max_block_name,
-                    logical_block_type_ptr->name.c_str(), max_tile_name,
-                    physical_tile_type_ptr->name.c_str(), num_instances);
-        }
-    }
-    VTR_LOG("\n");
-}
-
-static void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats) {
-    size_t total_swap_attempts = swap_stats.num_swap_rejected + swap_stats.num_swap_accepted + swap_stats.num_swap_aborted;
-    VTR_ASSERT(total_swap_attempts > 0);
-
-    size_t num_swap_print_digits = ceil(log10(total_swap_attempts));
-    float reject_rate = (float)swap_stats.num_swap_rejected / total_swap_attempts;
-    float accept_rate = (float)swap_stats.num_swap_accepted / total_swap_attempts;
-    float abort_rate = (float)swap_stats.num_swap_aborted / total_swap_attempts;
-    VTR_LOG("Placement number of temperatures: %d\n", state.num_temps);
-    VTR_LOG("Placement total # of swap attempts: %*d\n", num_swap_print_digits,
-            total_swap_attempts);
-    VTR_LOG("\tSwaps accepted: %*d (%4.1f %%)\n", num_swap_print_digits,
-            swap_stats.num_swap_accepted, 100 * accept_rate);
-    VTR_LOG("\tSwaps rejected: %*d (%4.1f %%)\n", num_swap_print_digits,
-            swap_stats.num_swap_rejected, 100 * reject_rate);
-    VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits,
-            swap_stats.num_swap_aborted, 100 * abort_rate);
-}
-
 static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry) {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
 
diff --git a/vpr/src/place/place_checkpoint.cpp b/vpr/src/place/place_checkpoint.cpp
index 85f4ab28e18..60b009d85ae 100644
--- a/vpr/src/place/place_checkpoint.cpp
+++ b/vpr/src/place/place_checkpoint.cpp
@@ -42,7 +42,7 @@ void restore_best_placement(PlacerState& placer_state,
                             t_placer_costs& costs,
                             std::unique_ptr<PlacerCriticalities>& placer_criticalities,
                             std::unique_ptr<PlacerSetupSlacks>& placer_setup_slacks,
-                            std::unique_ptr<PlaceDelayModel>& place_delay_model,
+                            std::shared_ptr<PlaceDelayModel>& place_delay_model,
                             std::unique_ptr<NetPinTimingInvalidator>& pin_timing_invalidator,
                             PlaceCritParams crit_params,
                             std::optional<NocCostHandler>& noc_cost_handler) {
diff --git a/vpr/src/place/place_checkpoint.h b/vpr/src/place/place_checkpoint.h
index 8c2313e7117..9a3fe76d5d8 100644
--- a/vpr/src/place/place_checkpoint.h
+++ b/vpr/src/place/place_checkpoint.h
@@ -74,7 +74,7 @@ void restore_best_placement(PlacerState& placer_state,
                             t_placer_costs& costs,
                             std::unique_ptr<PlacerCriticalities>& placer_criticalities,
                             std::unique_ptr<PlacerSetupSlacks>& placer_setup_slacks,
-                            std::unique_ptr<PlaceDelayModel>& place_delay_model,
+                            std::shared_ptr<PlaceDelayModel>& place_delay_model,
                             std::unique_ptr<NetPinTimingInvalidator>& pin_timing_invalidator,
                             PlaceCritParams crit_params,
                             std::optional<NocCostHandler>& noc_cost_handler);
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index d0b59b7a17a..03d861055c2 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -9,6 +9,10 @@
 #include "tatum/echo_writer.hpp"
 #include "verify_placement.h"
 #include "place_timing_update.h"
+#include "annealer.h"
+#include "RL_agent_util.h"
+#include "place_log_util.h"
+#include "place_checkpoint.h"
 
 Placer::Placer(const Netlist<>& net_list,
                const t_placer_opts& placer_opts,
@@ -278,4 +282,156 @@ void Placer::print_initial_placement_stats_() {
            blk_loc_registry.place_macros().macros().size(), num_macro_members,
            float(num_macro_members) / blk_loc_registry.place_macros().macros().size());
    VTR_LOG("\n");
-}
\ No newline at end of file
+}
+
+void Placer::place() {
+   const auto& timing_ctx = g_vpr_ctx.timing();
+   const auto& cluster_ctx = g_vpr_ctx.clustering();
+   const auto& p_runtime_ctx = placer_state_.runtime();
+
+   bool skip_anneal = false;
+#ifdef ENABLE_ANALYTIC_PLACE
+   // When enabled, skip most of the annealing and go straight to quench
+   if (placer_opts_.enable_analytic_placer) {
+       skip_anneal = true;
+   }
+#endif
+
+   float sTNS = NAN;
+   float sWNS = NAN;
+
+   const t_annealing_state& annealing_state = annealer_.get_annealing_state();
+   const auto& [swap_stats, move_type_stats, placer_stats] = annealer_.get_stats();
+
+   if (!skip_anneal) {
+       //Table header
+       print_place_status_header(noc_opts_.noc);
+
+       // Outer loop of the simulated annealing begins
+       do {
+           vtr::Timer temperature_timer;
+
+           annealer_.outer_loop_update_timing_info();
+
+           if (placer_opts_.place_algorithm.is_timing_driven()) {
+               critical_path_ = timing_info_->least_slack_critical_path();
+               sTNS = timing_info_->setup_total_negative_slack();
+               sWNS = timing_info_->setup_worst_negative_slack();
+
+               // see if we should save the current placement solution as a checkpoint
+               if (placer_opts_.place_checkpointing && annealer_.get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) {
+                   save_placement_checkpoint_if_needed(placer_state_.mutable_block_locs(),
+                                                       placement_checkpoint_,
+                                                       timing_info_, costs_, critical_path_.delay());
+               }
+           }
+
+           // do a complete inner loop iteration
+           annealer_.placement_inner_loop();
+
+           print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(),
+                              critical_path_.delay(), sTNS, sWNS, annealer_.get_total_iteration(),
+                              noc_opts_.noc, costs_.noc_cost_terms);
+
+//           sprintf(msg, "Cost: %g  BB Cost %g  TD Cost %g  Temperature: %g",
+//                   costs_.cost, costs_.bb_cost, costs_.timing_cost, annealing_state.t);
+//
+//           update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info_);
+
+           //#ifdef VERBOSE
+           //            if (getEchoEnabled()) {
+           //                print_clb_placement("first_iteration_clb_placement.echo");
+           //            }
+           //#endif
+
+           // Outer loop of the simulated annealing ends
+       } while (annealer_.outer_loop_update_state());
+   } //skip_anneal ends
+
+    // Start Quench
+    annealer_.start_quench();
+
+    auto pre_quench_timing_stats = timing_ctx.stats;
+    { // Quench
+       vtr::ScopedFinishTimer temperature_timer("Placement Quench");
+
+       annealer_.outer_loop_update_timing_info();
+
+       /* Run inner loop again with temperature = 0 so as to accept only swaps
+        * which reduce the cost of the placement */
+       annealer_.placement_inner_loop();
+
+       if (placer_opts_.place_quench_algorithm.is_timing_driven()) {
+           critical_path_ = timing_info_->least_slack_critical_path();
+           sTNS = timing_info_->setup_total_negative_slack();
+           sWNS = timing_info_->setup_worst_negative_slack();
+       }
+
+       print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(),
+                          critical_path_.delay(), sTNS, sWNS, annealer_.get_total_iteration(),
+                          noc_opts_.noc, costs_.noc_cost_terms);
+    }
+    auto post_quench_timing_stats = timing_ctx.stats;
+
+    // Final timing analysis
+    PlaceCritParams crit_params;
+    crit_params.crit_exponent = annealing_state.crit_exponent;
+    crit_params.crit_limit = placer_opts_.place_crit_limit;
+
+    if (placer_opts_.place_algorithm.is_timing_driven()) {
+       perform_full_timing_update(crit_params, place_delay_model_.get(), placer_criticalities_.get(),
+                                  placer_setup_slacks_.get(), pin_timing_invalidator_.get(),
+                                  timing_info_.get(), &costs_, placer_state_);
+       VTR_LOG("post-quench CPD = %g (ns) \n",
+               1e9 * timing_info_->least_slack_critical_path().delay());
+    }
+
+    // See if our latest checkpoint is better than the current placement solution
+    if (placer_opts_.place_checkpointing) {
+       restore_best_placement(placer_state_,
+                              placement_checkpoint_, timing_info_, costs_,
+                              placer_criticalities_, placer_setup_slacks_, place_delay_model_,
+                              pin_timing_invalidator_, crit_params, noc_cost_handler_);
+    }
+
+    if (placer_opts_.placement_saves_per_temperature >= 1) {
+       std::string filename = vtr::string_fmt("placement_%03d_%03d.place",
+                                              annealing_state.num_temps + 1, 0);
+       VTR_LOG("Saving final placement to file: %s\n", filename.c_str());
+       print_place(nullptr, nullptr, filename.c_str(), placer_state_.mutable_block_locs());
+    }
+
+    //#ifdef VERBOSE
+    //    if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_END_CLB_PLACEMENT)) {
+    //        print_clb_placement(getEchoFileName(E_ECHO_END_CLB_PLACEMENT));
+    //    }
+    //#endif
+
+    // Update physical pin values
+    for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) {
+       placer_state_.mutable_blk_loc_registry().place_sync_external_block_connections(block_id);
+    }
+
+    check_place_();
+
+
+    // Print out swap statistics
+    print_resources_utilization(placer_state_.blk_loc_registry());
+
+    print_placement_swaps_stats(annealing_state, swap_stats);
+
+    move_type_stats.print_placement_move_types_stats();
+
+    if (noc_opts_.noc) {
+       write_noc_placement_file(noc_opts_.noc_placement_file_name, placer_state_.block_locs());
+    }
+
+    print_timing_stats("Placement Quench", post_quench_timing_stats, pre_quench_timing_stats);
+    print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats);
+
+    VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n",
+            p_runtime_ctx.f_update_td_costs_connections_elapsed_sec,
+            p_runtime_ctx.f_update_td_costs_nets_elapsed_sec,
+            p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec,
+            p_runtime_ctx.f_update_td_costs_total_elapsed_sec);
+}
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index 57ebce2a8a2..a12fa65758b 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -24,6 +24,7 @@ class Placer {
            std::shared_ptr<PlaceDelayModel> place_delay_model,
            bool cube_bb);
 
+    void place();
 
     //TODO: make this private
   public:

From da5ebf6e3914a564db8b645367668bce19ebf966 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 12:48:04 -0500
Subject: [PATCH 05/32] add print_post_placement_stats_() to Placer

---
 vpr/src/place/place.cpp          | 123 +--------------------------
 vpr/src/place/place_log_util.cpp |  25 ++++++
 vpr/src/place/place_log_util.h   |  12 +++
 vpr/src/place/placer.cpp         | 140 +++++++++++++++++++++++++------
 vpr/src/place/placer.h           |  12 +++
 5 files changed, 166 insertions(+), 146 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 8344bfd5ff9..145a73d483a 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -73,19 +73,6 @@ static void free_placement_structs();
 
 static int count_connections();
 
-static void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
-                                               const t_analysis_opts& analysis_opts,
-                                               const SetupTimingInfo& timing_info,
-                                               const PlacementDelayCalculator& delay_calc,
-                                               bool is_flat,
-                                               const BlkLocRegistry& blk_loc_registry);
-
-/**
- * @brief Copies the placement location variables into the global placement context.
- * @param blk_loc_registry The placement location variables to be copied.
- */
-static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry);
-
 /*****************************************************************************/
 void try_place(const Netlist<>& net_list,
                const t_placer_opts& placer_opts,
@@ -108,13 +95,9 @@ void try_place(const Netlist<>& net_list,
      */
     VTR_ASSERT(!is_flat);
     const auto& device_ctx = g_vpr_ctx.device();
-    const auto& atom_ctx = g_vpr_ctx.atom();
-    const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& timing_ctx = g_vpr_ctx.timing();
     auto pre_place_timing_stats = timing_ctx.stats;
 
-    char msg[vtr::bufsize];
-
     /* Placement delay model is independent of the placement and can be shared across
      * multiple placers. So, it is created and initialized once. */
     std::shared_ptr<PlaceDelayModel> place_delay_model;
@@ -141,8 +124,6 @@ void try_place(const Netlist<>& net_list,
     VTR_LOG("Bounding box mode is %s\n", (cube_bb ? "Cube" : "Per-layer"));
     VTR_LOG("\n");
 
-    int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333));
-
     auto& place_ctx = g_vpr_ctx.mutable_placement();
     place_ctx.lock_loc_vars();
     place_ctx.compressed_block_grids = create_compressed_block_grids();
@@ -158,73 +139,11 @@ void try_place(const Netlist<>& net_list,
     const int width_fac = placer_opts.place_chan_width;
     init_draw_coords((float)width_fac, placer.placer_state_.blk_loc_registry());
 
-    sprintf(msg,
-            "Initial Placement.  Cost: %g  BB Cost: %g  TD Cost %g \t Channel Factor: %d",
-            costs.cost, costs.bb_cost, costs.timing_cost, width_fac);
-
-    // Draw the initial placement
-    update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info);
-
-    if (placer_opts.placement_saves_per_temperature >= 1) {
-        std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0,
-                                               0);
-        VTR_LOG("Saving initial placement to file: %s\n", filename.c_str());
-        print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs());
-    }
-
-    //Some stats
-    VTR_LOG("\n");
-    VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called);
-    blocks_affected.move_abortion_logger.report_aborted_moves();
-
-    if (placer_opts.place_algorithm.is_timing_driven()) {
-        //Final timing estimate
-        VTR_ASSERT(timing_info);
-
-        critical_path = timing_info->least_slack_critical_path();
-
-        if (isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) {
-            tatum::write_echo(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH),
-                              *timing_ctx.graph, *timing_ctx.constraints,
-                              *placement_delay_calc, timing_info->analyzer());
-
-            tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node);
-            write_setup_timing_graph_dot(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"),
-                                         *timing_info, debug_tnode);
-        }
-
-        generate_post_place_timing_reports(placer_opts, analysis_opts, *timing_info,
-                                           *placement_delay_calc, is_flat, blk_loc_registry);
-
-        // Print critical path delay metrics
-        VTR_LOG("\n");
-        print_setup_timing_summary(*timing_ctx.constraints,
-                                   *timing_info->setup_analyzer(), "Placement estimated ", "");
-    }
-
-    sprintf(msg,
-            "Placement. Cost: %g  bb_cost: %g td_cost: %g Channel Factor: %d",
-            costs.cost, costs.bb_cost, costs.timing_cost, width_fac);
-    VTR_LOG("Placement cost: %g, bb_cost: %g, td_cost: %g, \n", costs.cost,
-            costs.bb_cost, costs.timing_cost);
-    // print the noc costs info
-    if (noc_opts.noc) {
-        VTR_ASSERT(noc_cost_handler.has_value());
-        noc_cost_handler->print_noc_costs("\nNoC Placement Costs", costs, noc_opts);
-
-#ifdef ENABLE_NOC_SAT_ROUTING
-        if (costs.noc_cost_terms.congestion > 0.0) {
-            VTR_LOG("NoC routing configuration is congested. Invoking the SAT NoC router.\n");
-            invoke_sat_router(costs, noc_opts, placer_opts.seed);
-        }
-#endif //ENABLE_NOC_SAT_ROUTING
-    }
-
-    update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info);
+    placer.place();
 
     free_placement_structs();
 
-    copy_locs_to_global_state(blk_loc_registry);
+    placer.copy_locs_to_global_state();
 }
 
 /*only count non-global connections */
@@ -269,7 +188,7 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
     return cube_bb;
 }
 
-/* Frees the major structures needed by the placer (and not needed       *
+/* Frees the major structures needed by the placer (and not needed
  * elsewhere).   */
 static void free_placement_structs() {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
@@ -295,27 +214,6 @@ void print_clb_placement(const char* fname) {
 }
 #endif
 
-static void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
-                                               const t_analysis_opts& analysis_opts,
-                                               const SetupTimingInfo& timing_info,
-                                               const PlacementDelayCalculator& delay_calc,
-                                               bool is_flat,
-                                               const BlkLocRegistry& blk_loc_registry) {
-    const auto& timing_ctx = g_vpr_ctx.timing();
-    const auto& atom_ctx = g_vpr_ctx.atom();
-
-    VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph,
-                                    delay_calc, is_flat, blk_loc_registry);
-    resolver.set_detail_level(analysis_opts.timing_report_detail);
-
-    tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph,
-                                          *timing_ctx.constraints);
-
-    timing_reporter.report_timing_setup(
-        placer_opts.post_place_timing_report_file,
-        *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths);
-}
-
 #if 0
 static void update_screen_debug();
 
@@ -327,18 +225,3 @@ static void update_screen_debug() {
 }
 #endif
 
-static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry) {
-    auto& place_ctx = g_vpr_ctx.mutable_placement();
-
-    // the placement location variables should be unlocked before being accessed
-    place_ctx.unlock_loc_vars();
-
-    // copy the local location variables into the global state
-    auto& global_blk_loc_registry = place_ctx.mutable_blk_loc_registry();
-    global_blk_loc_registry = blk_loc_registry;
-
-#ifndef NO_GRAPHICS
-    // update the graphics' reference to placement location variables
-    get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry);
-#endif
-}
\ No newline at end of file
diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp
index d935f474d0a..62fd3e57ca1 100644
--- a/vpr/src/place/place_log_util.cpp
+++ b/vpr/src/place/place_log_util.cpp
@@ -3,6 +3,10 @@
 #include "vtr_log.h"
 #include "annealer.h"
 #include "place_util.h"
+#include "PostClusterDelayCalculator.h"
+#include "tatum/TimingReporter.hpp"
+#include "VprTimingGraphResolver.h"
+#include "timing_info.h"
 
 void print_place_status_header(bool noc_enabled) {
     VTR_LOG("\n");
@@ -116,4 +120,25 @@ void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_st
             swap_stats.num_swap_rejected, 100 * reject_rate);
     VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits,
             swap_stats.num_swap_aborted, 100 * abort_rate);
+}
+
+void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
+                                        const t_analysis_opts& analysis_opts,
+                                        const SetupTimingInfo& timing_info,
+                                        const PlacementDelayCalculator& delay_calc,
+                                        bool is_flat,
+                                        const BlkLocRegistry& blk_loc_registry) {
+    const auto& timing_ctx = g_vpr_ctx.timing();
+    const auto& atom_ctx = g_vpr_ctx.atom();
+
+    VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph,
+                                    delay_calc, is_flat, blk_loc_registry);
+    resolver.set_detail_level(analysis_opts.timing_report_detail);
+
+    tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph,
+                                          *timing_ctx.constraints);
+
+    timing_reporter.report_timing_setup(
+        placer_opts.post_place_timing_report_file,
+        *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths);
 }
\ No newline at end of file
diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h
index 22a2bbd9b03..7e8567a97a0 100644
--- a/vpr/src/place/place_log_util.h
+++ b/vpr/src/place/place_log_util.h
@@ -4,8 +4,13 @@
 
 #include <cstddef>
 
+#include "timing_info_fwd.h"
+#include "PlacementDelayCalculator.h"
+
 class t_annealing_state;
 class t_placer_statistics;
+struct t_placer_opts;
+struct t_analysis_opts;
 struct NocCostTerms;
 struct t_swap_stats;
 class BlkLocRegistry;
@@ -26,4 +31,11 @@ void print_resources_utilization(const BlkLocRegistry& blk_loc_registry);
 
 void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats);
 
+void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
+                                        const t_analysis_opts& analysis_opts,
+                                        const SetupTimingInfo& timing_info,
+                                        const PlacementDelayCalculator& delay_calc,
+                                        bool is_flat,
+                                        const BlkLocRegistry& blk_loc_registry);
+
 #endif //VTR_PLACE_LOG_UTIL_H
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 03d861055c2..f17f56c8b47 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -1,7 +1,10 @@
 
 #include "placer.h"
 
+#include <utility>
+
 #include "vtr_time.h"
+#include "draw.h"
 #include "read_place.h"
 #include "analytic_placer.h"
 #include "initial_placement.h"
@@ -22,12 +25,13 @@ Placer::Placer(const Netlist<>& net_list,
                std::shared_ptr<PlaceDelayModel> place_delay_model,
                bool cube_bb)
     : placer_opts_(placer_opts)
+    , analysis_opts_(analysis_opts)
     , noc_opts_(noc_opts)
     , costs_(placer_opts.place_algorithm, noc_opts.noc)
     , placer_state_(placer_opts.place_algorithm.is_timing_driven(), cube_bb)
     , rng_(placer_opts.seed)
     , net_cost_handler_(placer_opts, placer_state_, cube_bb)
-    , place_delay_model_(place_delay_model){
+    , place_delay_model_(std::move(place_delay_model)){
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& atom_ctx = g_vpr_ctx.atom();
@@ -59,8 +63,9 @@ Placer::Placer(const Netlist<>& net_list,
     initial_placement(placer_opts, placer_opts.constraints_file.c_str(),
                       noc_opts, blk_loc_registry, noc_cost_handler_, rng_);
 
+    const int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333));
     //create the move generator based on the chosen placement strategy
-//    auto [move_generator, move_generator2] = create_move_generators(placer_state_, placer_opts, move_lim, noc_opts.noc_centroid_weight, rng_);
+    auto [move_generator, move_generator2] = create_move_generators(placer_state_, placer_opts, move_lim, noc_opts.noc_centroid_weight, rng_);
 
     if (!placer_opts.write_initial_place_file.empty()) {
         print_place(nullptr, nullptr, placer_opts.write_initial_place_file.c_str(), placer_state_.block_locs());
@@ -117,6 +122,16 @@ Placer::Placer(const Netlist<>& net_list,
 
    // set the starting total placement cost
    costs_.cost = costs_.get_total_cost(placer_opts, noc_opts);
+
+   // Sanity check that initial placement is legal
+   check_place_();
+
+   print_initial_placement_stats_();
+
+   annealer_ = std::make_unique<PlacementAnnealer>(placer_opts_, placer_state_, costs_, net_cost_handler_, noc_cost_handler_,
+                                                   noc_opts_, rng_, std::move(move_generator), std::move(move_generator2), place_delay_model_.get(),
+                                                   placer_criticalities_.get(), placer_setup_slacks_.get(), timing_info_.get(), pin_timing_invalidator_.get(),
+                                                   move_lim);
 }
 
 void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list,
@@ -178,18 +193,6 @@ void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list,
    }
 
    costs_.timing_cost_norm = 1 / costs_.timing_cost;
-
-   // Sanity check that initial placement is legal
-   check_place_();
-
-   print_initial_placement_stats_();
-
-#ifndef ENABLE_ANALYTIC_PLACE
-   annealer_ = std::make_unique(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler,
-                                noc_opts, rng, std::move(move_generator), std::move(move_generator2), place_delay_model.get(),
-                                placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(),
-                                move_lim);
-#endif
 }
 
 void Placer::check_place_() {
@@ -282,6 +285,20 @@ void Placer::print_initial_placement_stats_() {
            blk_loc_registry.place_macros().macros().size(), num_macro_members,
            float(num_macro_members) / blk_loc_registry.place_macros().macros().size());
    VTR_LOG("\n");
+
+   char msg[vtr::bufsize];
+   sprintf(msg,
+           "Initial Placement.  Cost: %g  BB Cost: %g  TD Cost %g \t Channel Factor: %d",
+           costs_.cost, costs_.bb_cost, costs_.timing_cost, placer_opts_.place_chan_width);
+
+   // Draw the initial placement
+   update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info_);
+
+   if (placer_opts_.placement_saves_per_temperature >= 1) {
+       std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0, 0);
+       VTR_LOG("Saving initial placement to file: %s\n", filename.c_str());
+       print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs());
+   }
 }
 
 void Placer::place() {
@@ -300,8 +317,8 @@ void Placer::place() {
    float sTNS = NAN;
    float sWNS = NAN;
 
-   const t_annealing_state& annealing_state = annealer_.get_annealing_state();
-   const auto& [swap_stats, move_type_stats, placer_stats] = annealer_.get_stats();
+   const t_annealing_state& annealing_state = annealer_->get_annealing_state();
+   const auto& [swap_stats, move_type_stats, placer_stats] = annealer_->get_stats();
 
    if (!skip_anneal) {
        //Table header
@@ -311,7 +328,7 @@ void Placer::place() {
        do {
            vtr::Timer temperature_timer;
 
-           annealer_.outer_loop_update_timing_info();
+           annealer_->outer_loop_update_timing_info();
 
            if (placer_opts_.place_algorithm.is_timing_driven()) {
                critical_path_ = timing_info_->least_slack_critical_path();
@@ -319,7 +336,7 @@ void Placer::place() {
                sWNS = timing_info_->setup_worst_negative_slack();
 
                // see if we should save the current placement solution as a checkpoint
-               if (placer_opts_.place_checkpointing && annealer_.get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) {
+               if (placer_opts_.place_checkpointing && annealer_->get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) {
                    save_placement_checkpoint_if_needed(placer_state_.mutable_block_locs(),
                                                        placement_checkpoint_,
                                                        timing_info_, costs_, critical_path_.delay());
@@ -327,10 +344,10 @@ void Placer::place() {
            }
 
            // do a complete inner loop iteration
-           annealer_.placement_inner_loop();
+           annealer_->placement_inner_loop();
 
            print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(),
-                              critical_path_.delay(), sTNS, sWNS, annealer_.get_total_iteration(),
+                              critical_path_.delay(), sTNS, sWNS, annealer_->get_total_iteration(),
                               noc_opts_.noc, costs_.noc_cost_terms);
 
 //           sprintf(msg, "Cost: %g  BB Cost %g  TD Cost %g  Temperature: %g",
@@ -345,21 +362,21 @@ void Placer::place() {
            //#endif
 
            // Outer loop of the simulated annealing ends
-       } while (annealer_.outer_loop_update_state());
+       } while (annealer_->outer_loop_update_state());
    } //skip_anneal ends
 
     // Start Quench
-    annealer_.start_quench();
+    annealer_->start_quench();
 
     auto pre_quench_timing_stats = timing_ctx.stats;
     { // Quench
        vtr::ScopedFinishTimer temperature_timer("Placement Quench");
 
-       annealer_.outer_loop_update_timing_info();
+       annealer_->outer_loop_update_timing_info();
 
        /* Run inner loop again with temperature = 0 so as to accept only swaps
         * which reduce the cost of the placement */
-       annealer_.placement_inner_loop();
+       annealer_->placement_inner_loop();
 
        if (placer_opts_.place_quench_algorithm.is_timing_driven()) {
            critical_path_ = timing_info_->least_slack_critical_path();
@@ -368,7 +385,7 @@ void Placer::place() {
        }
 
        print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(),
-                          critical_path_.delay(), sTNS, sWNS, annealer_.get_total_iteration(),
+                          critical_path_.delay(), sTNS, sWNS, annealer_->get_total_iteration(),
                           noc_opts_.noc, costs_.noc_cost_terms);
     }
     auto post_quench_timing_stats = timing_ctx.stats;
@@ -427,7 +444,7 @@ void Placer::place() {
     }
 
     print_timing_stats("Placement Quench", post_quench_timing_stats, pre_quench_timing_stats);
-    print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats);
+//    print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats);
 
     VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n",
             p_runtime_ctx.f_update_td_costs_connections_elapsed_sec,
@@ -435,3 +452,74 @@ void Placer::place() {
             p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec,
             p_runtime_ctx.f_update_td_costs_total_elapsed_sec);
 }
+
+void Placer::print_post_placement_stats_() {
+    const auto& timing_ctx = g_vpr_ctx.timing();
+    const auto& [swap_stats, move_type_stats, placer_stats] = annealer_->get_stats();
+
+    VTR_LOG("\n");
+    VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called);
+//    blocks_affected.move_abortion_logger.report_aborted_moves();
+
+    if (placer_opts_.place_algorithm.is_timing_driven()) {
+       //Final timing estimate
+       VTR_ASSERT(timing_info_);
+
+       critical_path_ = timing_info_->least_slack_critical_path();
+
+       if (isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) {
+           tatum::write_echo(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH),
+                             *timing_ctx.graph, *timing_ctx.constraints,
+                             *placement_delay_calc_, timing_info_->analyzer());
+
+           tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts_.echo_dot_timing_graph_node);
+           write_setup_timing_graph_dot(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"),
+                                        *timing_info_, debug_tnode);
+       }
+
+       generate_post_place_timing_reports(placer_opts_, analysis_opts_, *timing_info_,
+                                          *placement_delay_calc_, /*is_flat=*/false, placer_state_.blk_loc_registry());
+
+       // Print critical path delay metrics
+       VTR_LOG("\n");
+       print_setup_timing_summary(*timing_ctx.constraints,
+                                  *timing_info_->setup_analyzer(), "Placement estimated ", "");
+    }
+
+    char msg[vtr::bufsize];
+    sprintf(msg,
+            "Placement. Cost: %g  bb_cost: %g td_cost: %g Channel Factor: %d",
+            costs_.cost, costs_.bb_cost, costs_.timing_cost, placer_opts_.place_chan_width);
+    VTR_LOG("Placement cost: %g, bb_cost: %g, td_cost: %g, \n", costs_.cost,
+            costs_.bb_cost, costs_.timing_cost);
+    update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info_);
+
+    // print the noc costs info
+    if (noc_opts_.noc) {
+       VTR_ASSERT(noc_cost_handler_.has_value());
+       noc_cost_handler_->print_noc_costs("\nNoC Placement Costs", costs_, noc_opts_);
+
+#ifdef ENABLE_NOC_SAT_ROUTING
+       if (costs.noc_cost_terms.congestion > 0.0) {
+           VTR_LOG("NoC routing configuration is congested. Invoking the SAT NoC router.\n");
+           invoke_sat_router(costs, noc_opts, placer_opts.seed);
+       }
+#endif //ENABLE_NOC_SAT_ROUTING
+    }
+}
+
+void Placer::copy_locs_to_global_state() {
+    auto& place_ctx = g_vpr_ctx.mutable_placement();
+
+    // the placement location variables should be unlocked before being accessed
+    place_ctx.unlock_loc_vars();
+
+    // copy the local location variables into the global state
+    auto& global_blk_loc_registry = place_ctx.mutable_blk_loc_registry();
+    global_blk_loc_registry = placer_state_.blk_loc_registry();
+
+#ifndef NO_GRAPHICS
+    // update the graphics' reference to placement location variables
+    get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry);
+#endif
+}
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index a12fa65758b..c24c045b3ca 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -13,6 +13,10 @@
 #include "noc_place_utils.h"
 #include "net_cost_handler.h"
 
+class PlacementAnnealer;
+namespace vtr{
+class ScopedStartFinishTimer;
+}
 
 class Placer {
   public:
@@ -26,9 +30,15 @@ class Placer {
 
     void place();
 
+    /**
+     * @brief Copies the placement location variables into the global placement context.
+     */
+    void copy_locs_to_global_state();
+
     //TODO: make this private
   public:
     const t_placer_opts& placer_opts_;
+    const t_analysis_opts& analysis_opts_;
     const t_noc_opts& noc_opts_;
     t_placer_costs costs_;
     PlacerState placer_state_;
@@ -70,6 +80,8 @@ class Placer {
     int check_placement_costs_();
 
     void print_initial_placement_stats_();
+
+    void print_post_placement_stats_();
 };
 
 #endif //VTR_PLACER_H

From d1d5e7fb98615d041f75bcc6aa736d80e7aba49f Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 12:51:33 -0500
Subject: [PATCH 06/32] removed unused inclusions from place.cpp

---
 vpr/src/place/place.cpp | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 145a73d483a..4030c04e216 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1,58 +1,26 @@
-#include <cstdio>
-#include <cmath>
 #include <memory>
-#include <chrono>
-#include <optional>
 
-#include "NetPinTimingInvalidator.h"
-#include "clustered_netlist.h"
-#include "device_grid.h"
-#include "verify_placement.h"
 #include "vtr_assert.h"
 #include "vtr_log.h"
-#include "vtr_util.h"
 #include "vtr_time.h"
-#include "vtr_math.h"
-
 #include "vpr_types.h"
-#include "vpr_error.h"
 #include "vpr_utils.h"
 
 #include "globals.h"
 #include "place.h"
 #include "annealer.h"
-#include "read_place.h"
 #include "draw.h"
-#include "timing_place.h"
 #include "read_xml_arch_file.h"
 #include "echo_files.h"
 #include "histogram.h"
-#include "place_util.h"
-#include "analytic_placer.h"
-#include "initial_placement.h"
 #include "place_delay_model.h"
-#include "place_timing_update.h"
-#include "move_transactions.h"
 #include "move_utils.h"
 #include "buttons.h"
 
-#include "PlacementDelayCalculator.h"
 #include "VprTimingGraphResolver.h"
-#include "timing_util.h"
-#include "timing_info.h"
-#include "concrete_timing_info.h"
-#include "tatum/echo_writer.hpp"
 #include "tatum/TimingReporter.hpp"
 
 #include "RL_agent_util.h"
-#include "place_checkpoint.h"
-
-#include "clustered_netlist_utils.h"
-
-#include "noc_place_utils.h"
-
-#include "net_cost_handler.h"
-#include "placer_state.h"
 #include "placer.h"
 
 /********************* Static subroutines local to place.c *******************/

From a3bb7628b774342d66c709aa247f2d166f6c2891 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 13:20:07 -0500
Subject: [PATCH 07/32] add PlacementLogPrinter class

---
 vpr/src/place/place.cpp          | 23 ++------------
 vpr/src/place/place_log_util.cpp | 51 ++++++++++++++++++++------------
 vpr/src/place/place_log_util.h   | 24 +++++++--------
 vpr/src/place/placer.cpp         | 22 ++++++--------
 vpr/src/place/placer.h           |  3 ++
 5 files changed, 57 insertions(+), 66 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 4030c04e216..fa9f007dbb9 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -39,8 +39,6 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
 
 static void free_placement_structs();
 
-static int count_connections();
-
 /*****************************************************************************/
 void try_place(const Netlist<>& net_list,
                const t_placer_opts& placer_opts,
@@ -63,8 +61,8 @@ void try_place(const Netlist<>& net_list,
      */
     VTR_ASSERT(!is_flat);
     const auto& device_ctx = g_vpr_ctx.device();
-    const auto& timing_ctx = g_vpr_ctx.timing();
-    auto pre_place_timing_stats = timing_ctx.stats;
+//    const auto& timing_ctx = g_vpr_ctx.timing();
+//    auto pre_place_timing_stats = timing_ctx.stats;
 
     /* Placement delay model is independent of the placement and can be shared across
      * multiple placers. So, it is created and initialized once. */
@@ -114,23 +112,6 @@ void try_place(const Netlist<>& net_list,
     placer.copy_locs_to_global_state();
 }
 
-/*only count non-global connections */
-static int count_connections() {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    int count = 0;
-
-    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
-        if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
-            continue;
-        }
-
-        count += cluster_ctx.clb_nlist.net_sinks(net_id).size();
-    }
-
-    return count;
-}
-
 static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
                        const RRGraphView& rr_graph) {
     bool cube_bb;
diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp
index 62fd3e57ca1..4f82ef6e442 100644
--- a/vpr/src/place/place_log_util.cpp
+++ b/vpr/src/place/place_log_util.cpp
@@ -7,8 +7,14 @@
 #include "tatum/TimingReporter.hpp"
 #include "VprTimingGraphResolver.h"
 #include "timing_info.h"
+#include "placer.h"
+
+PlacementLogPrinter::PlacementLogPrinter(const Placer& placer)
+    : placer_(placer) {}
+
+void PlacementLogPrinter::print_place_status_header() const {
+    const bool noc_enabled = placer_.noc_opts_.noc;
 
-void print_place_status_header(bool noc_enabled) {
     VTR_LOG("\n");
     if (!noc_enabled) {
         VTR_LOG(
@@ -31,28 +37,31 @@ void print_place_status_header(bool noc_enabled) {
     }
 }
 
-void print_place_status(const t_annealing_state& state,
-                        const t_placer_statistics& stats,
-                        float elapsed_sec,
-                        float cpd,
-                        float sTNS,
-                        float sWNS,
-                        size_t tot_moves,
-                        bool noc_enabled,
-                        const NocCostTerms& noc_cost_terms) {
+void PlacementLogPrinter::print_place_status(float elapsed_sec) const {
+    const t_annealing_state& annealing_state = placer_.annealer_->get_annealing_state();
+    const auto& [swap_stats, move_type_stats, placer_stats] = placer_.annealer_->get_stats();
+    const int tot_moves = placer_.annealer_->get_total_iteration();
+    const bool noc_enabled = placer_.noc_opts_.noc;
+    const NocCostTerms& noc_cost_terms = placer_.costs_.noc_cost_terms;
+
+    const bool is_timing_driven = placer_.placer_opts_.place_algorithm.is_timing_driven();
+    const float cpd = is_timing_driven ? placer_.critical_path_.delay() : std::numeric_limits<float>::quiet_NaN();
+    const float sTNS = is_timing_driven ? placer_.timing_info_->setup_total_negative_slack() : std::numeric_limits<float>::quiet_NaN();
+    const float sWNS = is_timing_driven ? placer_.timing_info_->setup_worst_negative_slack() : std::numeric_limits<float>::quiet_NaN();
+
     VTR_LOG(
         "%4zu %6.1f %7.1e "
         "%7.3f %10.2f %-10.5g "
         "%7.3f % 10.3g % 8.3f "
         "%7.3f %7.4f %6.1f %8.2f",
-        state.num_temps, elapsed_sec, state.t,
-        stats.av_cost, stats.av_bb_cost, stats.av_timing_cost,
+        annealing_state.num_temps, elapsed_sec, annealing_state.t,
+        placer_stats.av_cost, placer_stats.av_bb_cost, placer_stats.av_timing_cost,
         1e9 * cpd, 1e9 * sTNS, 1e9 * sWNS,
-        stats.success_rate, stats.std_dev, state.rlim, state.crit_exponent);
+        placer_stats.success_rate, placer_stats.std_dev, annealing_state.rlim, annealing_state.crit_exponent);
 
     pretty_print_uint(" ", tot_moves, 9, 3);
 
-    VTR_LOG(" %6.3f", state.alpha);
+    VTR_LOG(" %6.3f", annealing_state.alpha);
 
     if (noc_enabled) {
         VTR_LOG(
@@ -66,10 +75,10 @@ void print_place_status(const t_annealing_state& state,
     fflush(stdout);
 }
 
-void print_resources_utilization(const BlkLocRegistry& blk_loc_registry) {
+void PlacementLogPrinter::print_resources_utilization() const {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& device_ctx = g_vpr_ctx.device();
-    const auto& block_locs = blk_loc_registry.block_locs();
+    const auto& block_locs = placer_.placer_state_.block_locs();
 
     size_t max_block_name = 0;
     size_t max_tile_name = 0;
@@ -103,7 +112,10 @@ void print_resources_utilization(const BlkLocRegistry& blk_loc_registry) {
     VTR_LOG("\n");
 }
 
-void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats) {
+void PlacementLogPrinter::print_placement_swaps_stats() const {
+    const auto& [swap_stats, move_type_stats, placer_stats] = placer_.annealer_->get_stats();
+    const t_annealing_state& annealing_state = placer_.annealer_->get_annealing_state();
+
     size_t total_swap_attempts = swap_stats.num_swap_rejected + swap_stats.num_swap_accepted + swap_stats.num_swap_aborted;
     VTR_ASSERT(total_swap_attempts > 0);
 
@@ -111,7 +123,7 @@ void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_st
     float reject_rate = (float)swap_stats.num_swap_rejected / total_swap_attempts;
     float accept_rate = (float)swap_stats.num_swap_accepted / total_swap_attempts;
     float abort_rate = (float)swap_stats.num_swap_aborted / total_swap_attempts;
-    VTR_LOG("Placement number of temperatures: %d\n", state.num_temps);
+    VTR_LOG("Placement number of temperatures: %d\n", annealing_state.num_temps);
     VTR_LOG("Placement total # of swap attempts: %*d\n", num_swap_print_digits,
             total_swap_attempts);
     VTR_LOG("\tSwaps accepted: %*d (%4.1f %%)\n", num_swap_print_digits,
@@ -141,4 +153,5 @@ void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
     timing_reporter.report_timing_setup(
         placer_opts.post_place_timing_report_file,
         *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths);
-}
\ No newline at end of file
+}
+
diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h
index 7e8567a97a0..f6bb64b8f9d 100644
--- a/vpr/src/place/place_log_util.h
+++ b/vpr/src/place/place_log_util.h
@@ -14,22 +14,20 @@ struct t_analysis_opts;
 struct NocCostTerms;
 struct t_swap_stats;
 class BlkLocRegistry;
+class Placer;
 
-void print_place_status_header(bool noc_enabled);
+class PlacementLogPrinter {
+  public:
+    explicit PlacementLogPrinter(const Placer& placer);
 
-void print_place_status(const t_annealing_state& state,
-                        const t_placer_statistics& stats,
-                        float elapsed_sec,
-                        float cpd,
-                        float sTNS,
-                        float sWNS,
-                        size_t tot_moves,
-                        bool noc_enabled,
-                        const NocCostTerms& noc_cost_terms);
+    void print_place_status_header() const;
+    void print_resources_utilization() const;
+    void print_placement_swaps_stats() const;
+    void print_place_status(float elapsed_sec) const;
 
-void print_resources_utilization(const BlkLocRegistry& blk_loc_registry);
-
-void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats);
+  private:
+    const Placer& placer_;
+};
 
 void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
                                         const t_analysis_opts& analysis_opts,
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index f17f56c8b47..76aa214ded3 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -14,7 +14,6 @@
 #include "place_timing_update.h"
 #include "annealer.h"
 #include "RL_agent_util.h"
-#include "place_log_util.h"
 #include "place_checkpoint.h"
 
 Placer::Placer(const Netlist<>& net_list,
@@ -31,7 +30,8 @@ Placer::Placer(const Netlist<>& net_list,
     , placer_state_(placer_opts.place_algorithm.is_timing_driven(), cube_bb)
     , rng_(placer_opts.seed)
     , net_cost_handler_(placer_opts, placer_state_, cube_bb)
-    , place_delay_model_(std::move(place_delay_model)){
+    , place_delay_model_(std::move(place_delay_model))
+    , log_printer_(*this) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& atom_ctx = g_vpr_ctx.atom();
@@ -322,7 +322,7 @@ void Placer::place() {
 
    if (!skip_anneal) {
        //Table header
-       print_place_status_header(noc_opts_.noc);
+       log_printer_.print_place_status_header();
 
        // Outer loop of the simulated annealing begins
        do {
@@ -346,9 +346,7 @@ void Placer::place() {
            // do a complete inner loop iteration
            annealer_->placement_inner_loop();
 
-           print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(),
-                              critical_path_.delay(), sTNS, sWNS, annealer_->get_total_iteration(),
-                              noc_opts_.noc, costs_.noc_cost_terms);
+           log_printer_.print_place_status(temperature_timer.elapsed_sec());
 
 //           sprintf(msg, "Cost: %g  BB Cost %g  TD Cost %g  Temperature: %g",
 //                   costs_.cost, costs_.bb_cost, costs_.timing_cost, annealing_state.t);
@@ -384,9 +382,7 @@ void Placer::place() {
            sWNS = timing_info_->setup_worst_negative_slack();
        }
 
-       print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(),
-                          critical_path_.delay(), sTNS, sWNS, annealer_->get_total_iteration(),
-                          noc_opts_.noc, costs_.noc_cost_terms);
+       log_printer_.print_place_status(temperature_timer.elapsed_sec());
     }
     auto post_quench_timing_stats = timing_ctx.stats;
 
@@ -431,11 +427,11 @@ void Placer::place() {
 
     check_place_();
 
+    print_post_placement_stats_();
 
-    // Print out swap statistics
-    print_resources_utilization(placer_state_.blk_loc_registry());
-
-    print_placement_swaps_stats(annealing_state, swap_stats);
+    // Print out swap statistics and resource utilization
+    log_printer_.print_resources_utilization();
+    log_printer_.print_placement_swaps_stats();
 
     move_type_stats.print_placement_move_types_stats();
 
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index c24c045b3ca..96010151149 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -12,6 +12,7 @@
 #include "placer_state.h"
 #include "noc_place_utils.h"
 #include "net_cost_handler.h"
+#include "place_log_util.h"
 
 class PlacementAnnealer;
 namespace vtr{
@@ -64,6 +65,8 @@ class Placer {
 
     std::unique_ptr<PlacementAnnealer> annealer_;
 
+    PlacementLogPrinter log_printer_;
+
   private:
     void alloc_and_init_timing_objects_(const Netlist<>& net_list,
                                         const t_analysis_opts& analysis_opts);

From 1e10c27565fbab84b6c0e05c3a991fe967bce638 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 13:22:21 -0500
Subject: [PATCH 08/32] remove unused sTNS and sWNS in Placer

---
 vpr/src/place/placer.cpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 76aa214ded3..9b6a6f62c61 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -314,9 +314,6 @@ void Placer::place() {
    }
 #endif
 
-   float sTNS = NAN;
-   float sWNS = NAN;
-
    const t_annealing_state& annealing_state = annealer_->get_annealing_state();
    const auto& [swap_stats, move_type_stats, placer_stats] = annealer_->get_stats();
 
@@ -332,8 +329,6 @@ void Placer::place() {
 
            if (placer_opts_.place_algorithm.is_timing_driven()) {
                critical_path_ = timing_info_->least_slack_critical_path();
-               sTNS = timing_info_->setup_total_negative_slack();
-               sWNS = timing_info_->setup_worst_negative_slack();
 
                // see if we should save the current placement solution as a checkpoint
                if (placer_opts_.place_checkpointing && annealer_->get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) {
@@ -378,8 +373,6 @@ void Placer::place() {
 
        if (placer_opts_.place_quench_algorithm.is_timing_driven()) {
            critical_path_ = timing_info_->least_slack_critical_path();
-           sTNS = timing_info_->setup_total_negative_slack();
-           sWNS = timing_info_->setup_worst_negative_slack();
        }
 
        log_printer_.print_place_status(temperature_timer.elapsed_sec());

From 34deb5c05024a44a4884552a876400b429e95aed Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 15:10:32 -0500
Subject: [PATCH 09/32] make member variables of Placer private

---
 vpr/src/place/place.cpp          |  6 +++---
 vpr/src/place/place_log_util.cpp | 29 ++++++++++++++++-------------
 vpr/src/place/placer.cpp         | 29 +++++++++++++++++++++++++++++
 vpr/src/place/placer.h           | 21 ++++++++++++++++++++-
 4 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index fa9f007dbb9..ba86e91020b 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -97,13 +97,13 @@ void try_place(const Netlist<>& net_list,
     Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb);
 
 #ifndef NO_GRAPHICS
-    if (placer.noc_cost_handler_.has_value()) {
-        get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(placer.noc_cost_handler_->get_link_bandwidth_usages());
+    if (placer.noc_cost_handler().has_value()) {
+        get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(placer.noc_cost_handler()->get_link_bandwidth_usages());
     }
 #endif
 
     const int width_fac = placer_opts.place_chan_width;
-    init_draw_coords((float)width_fac, placer.placer_state_.blk_loc_registry());
+    init_draw_coords((float)width_fac, placer.placer_state().blk_loc_registry());
 
     placer.place();
 
diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp
index 4f82ef6e442..4a5e90a2c42 100644
--- a/vpr/src/place/place_log_util.cpp
+++ b/vpr/src/place/place_log_util.cpp
@@ -13,7 +13,7 @@ PlacementLogPrinter::PlacementLogPrinter(const Placer& placer)
     : placer_(placer) {}
 
 void PlacementLogPrinter::print_place_status_header() const {
-    const bool noc_enabled = placer_.noc_opts_.noc;
+    const bool noc_enabled = placer_.noc_opts().noc;
 
     VTR_LOG("\n");
     if (!noc_enabled) {
@@ -38,16 +38,18 @@ void PlacementLogPrinter::print_place_status_header() const {
 }
 
 void PlacementLogPrinter::print_place_status(float elapsed_sec) const {
-    const t_annealing_state& annealing_state = placer_.annealer_->get_annealing_state();
-    const auto& [swap_stats, move_type_stats, placer_stats] = placer_.annealer_->get_stats();
-    const int tot_moves = placer_.annealer_->get_total_iteration();
-    const bool noc_enabled = placer_.noc_opts_.noc;
-    const NocCostTerms& noc_cost_terms = placer_.costs_.noc_cost_terms;
+    const PlacementAnnealer& annealer = placer_.annealer();
+    const t_annealing_state& annealing_state = annealer.get_annealing_state();
+    const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats();
+    const int tot_moves = annealer.get_total_iteration();
 
-    const bool is_timing_driven = placer_.placer_opts_.place_algorithm.is_timing_driven();
-    const float cpd = is_timing_driven ? placer_.critical_path_.delay() : std::numeric_limits<float>::quiet_NaN();
-    const float sTNS = is_timing_driven ? placer_.timing_info_->setup_total_negative_slack() : std::numeric_limits<float>::quiet_NaN();
-    const float sWNS = is_timing_driven ? placer_.timing_info_->setup_worst_negative_slack() : std::numeric_limits<float>::quiet_NaN();
+    const bool noc_enabled = placer_.noc_opts().noc;
+    const NocCostTerms& noc_cost_terms = placer_.costs().noc_cost_terms;
+
+    const bool is_timing_driven = placer_.placer_opts().place_algorithm.is_timing_driven();
+    const float cpd = is_timing_driven ? placer_.critical_path().delay() : std::numeric_limits<float>::quiet_NaN();
+    const float sTNS = is_timing_driven ? placer_.timing_info()->setup_total_negative_slack() : std::numeric_limits<float>::quiet_NaN();
+    const float sWNS = is_timing_driven ? placer_.timing_info()->setup_worst_negative_slack() : std::numeric_limits<float>::quiet_NaN();
 
     VTR_LOG(
         "%4zu %6.1f %7.1e "
@@ -78,7 +80,7 @@ void PlacementLogPrinter::print_place_status(float elapsed_sec) const {
 void PlacementLogPrinter::print_resources_utilization() const {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& device_ctx = g_vpr_ctx.device();
-    const auto& block_locs = placer_.placer_state_.block_locs();
+    const auto& block_locs = placer_.placer_state().block_locs();
 
     size_t max_block_name = 0;
     size_t max_tile_name = 0;
@@ -113,8 +115,9 @@ void PlacementLogPrinter::print_resources_utilization() const {
 }
 
 void PlacementLogPrinter::print_placement_swaps_stats() const {
-    const auto& [swap_stats, move_type_stats, placer_stats] = placer_.annealer_->get_stats();
-    const t_annealing_state& annealing_state = placer_.annealer_->get_annealing_state();
+    const PlacementAnnealer& annealer = placer_.annealer();
+    const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats();
+    const t_annealing_state& annealing_state = annealer.get_annealing_state();
 
     size_t total_swap_attempts = swap_stats.num_swap_rejected + swap_stats.num_swap_accepted + swap_stats.num_swap_aborted;
     VTR_ASSERT(total_swap_attempts > 0);
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 9b6a6f62c61..ec78ce47fa8 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -512,3 +512,32 @@ void Placer::copy_locs_to_global_state() {
     get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry);
 #endif
 }
+
+const PlacementAnnealer& Placer::annealer() const {
+    return *annealer_;
+}
+
+const t_placer_opts& Placer::placer_opts() const {
+    return placer_opts_;
+}
+
+const t_noc_opts& Placer::noc_opts() const {
+    return noc_opts_;
+}
+
+const t_placer_costs& Placer::costs() const {
+    return costs_;
+}
+
+const tatum::TimingPathInfo& Placer::critical_path() const {
+    return critical_path_;
+}
+std::shared_ptr<const SetupTimingInfo> Placer::timing_info() const {
+    return timing_info_;
+}
+const PlacerState& Placer::placer_state() const {
+    return placer_state_;
+}
+const std::optional<NocCostHandler>& Placer::noc_cost_handler() const {
+    return noc_cost_handler_;
+}
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index 96010151149..ec0aaaeb44c 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -36,8 +36,27 @@ class Placer {
      */
     void copy_locs_to_global_state();
 
+    /*
+     * Getters
+     */
+    const PlacementAnnealer& annealer() const;
+
+    const t_placer_opts& placer_opts() const;
+
+    const t_noc_opts& noc_opts() const;
+
+    const t_placer_costs& costs() const;
+
+    const tatum::TimingPathInfo& critical_path() const;
+
+    std::shared_ptr<const SetupTimingInfo> timing_info() const;
+
+    const PlacerState& placer_state() const;
+
+    const std::optional<NocCostHandler>& noc_cost_handler() const;
+
     //TODO: make this private
-  public:
+  private:
     const t_placer_opts& placer_opts_;
     const t_analysis_opts& analysis_opts_;
     const t_noc_opts& noc_opts_;

From ce0fb18897385bf988e79b7f51a5388d5de13138 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 15:30:45 -0500
Subject: [PATCH 10/32] add print_initial_placement_stats() to
 PlacementLogPrinter

---
 vpr/src/draw/draw.cpp            |  2 +-
 vpr/src/draw/draw.h              |  2 +-
 vpr/src/place/place_log_util.cpp | 55 ++++++++++++++++++++++++++++++++
 vpr/src/place/place_log_util.h   |  1 +
 vpr/src/place/placer.cpp         | 48 +---------------------------
 vpr/src/place/placer.h           |  2 --
 6 files changed, 59 insertions(+), 51 deletions(-)

diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp
index 546bc8b55f3..46bdd750ca9 100644
--- a/vpr/src/draw/draw.cpp
+++ b/vpr/src/draw/draw.cpp
@@ -367,7 +367,7 @@ static void initial_setup_NO_PICTURE_to_ROUTING_with_crit_path(
 }
 #endif //NO_GRAPHICS
 
-void update_screen(ScreenUpdatePriority priority, const char* msg, enum pic_type pic_on_screen_val, std::shared_ptr<SetupTimingInfo> setup_timing_info) {
+void update_screen(ScreenUpdatePriority priority, const char* msg, enum pic_type pic_on_screen_val, std::shared_ptr<const SetupTimingInfo> setup_timing_info) {
 #ifndef NO_GRAPHICS
 
     /* Updates the screen if the user has requested graphics.  The priority  *
diff --git a/vpr/src/draw/draw.h b/vpr/src/draw/draw.h
index 2bbd17d077f..355b2891931 100644
--- a/vpr/src/draw/draw.h
+++ b/vpr/src/draw/draw.h
@@ -42,7 +42,7 @@ extern ezgl::application application;
 
 #endif /* NO_GRAPHICS */
 
-void update_screen(ScreenUpdatePriority priority, const char* msg, enum pic_type pic_on_screen_val, std::shared_ptr<SetupTimingInfo> timing_info);
+void update_screen(ScreenUpdatePriority priority, const char* msg, enum pic_type pic_on_screen_val, std::shared_ptr<const SetupTimingInfo> timing_info);
 
 //FIXME: Currently broken if no rr-graph is loaded
 /**
diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp
index 4a5e90a2c42..5e375aa1f35 100644
--- a/vpr/src/place/place_log_util.cpp
+++ b/vpr/src/place/place_log_util.cpp
@@ -8,6 +8,8 @@
 #include "VprTimingGraphResolver.h"
 #include "timing_info.h"
 #include "placer.h"
+#include "draw.h"
+#include "read_place.h"
 
 PlacementLogPrinter::PlacementLogPrinter(const Placer& placer)
     : placer_(placer) {}
@@ -136,6 +138,59 @@ void PlacementLogPrinter::print_placement_swaps_stats() const {
     VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits,
             swap_stats.num_swap_aborted, 100 * abort_rate);
 }
+void PlacementLogPrinter::print_initial_placement_stats() const {
+    const t_placer_costs& costs = placer_.costs();
+    const t_noc_opts& noc_opts = placer_.noc_opts();
+    const t_placer_opts& placer_opts = placer_.placer_opts();
+    const tatum::TimingPathInfo& critical_path = placer_.critical_path();
+    const std::optional<NocCostHandler>& noc_cost_handler = placer_.noc_cost_handler();
+    std::shared_ptr<const SetupTimingInfo> timing_info = placer_.timing_info();
+    const PlacerState& placer_state = placer_.placer_state();
+
+    VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n",
+        costs.cost, costs.bb_cost, costs.timing_cost);
+
+    if (noc_opts.noc) {
+        VTR_ASSERT(noc_cost_handler.has_value());
+        noc_cost_handler->print_noc_costs("Initial NoC Placement Costs", costs, noc_opts);
+    }
+
+    if (placer_opts.place_algorithm.is_timing_driven()) {
+        VTR_LOG("Initial placement estimated Critical Path Delay (CPD): %g ns\n",
+                1e9 * critical_path.delay());
+        VTR_LOG("Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n",
+                1e9 * timing_info->setup_total_negative_slack());
+        VTR_LOG("Initial placement estimated setup Worst Negative Slack (sWNS): %g ns\n",
+                1e9 * timing_info->setup_worst_negative_slack());
+        VTR_LOG("\n");
+        VTR_LOG("Initial placement estimated setup slack histogram:\n");
+        print_histogram(create_setup_slack_histogram(*timing_info->setup_analyzer()));
+    }
+
+    const BlkLocRegistry& blk_loc_registry = placer_state.blk_loc_registry();
+    size_t num_macro_members = 0;
+    for (const t_pl_macro& macro : blk_loc_registry.place_macros().macros()) {
+        num_macro_members += macro.members.size();
+    }
+    VTR_LOG("Placement contains %zu placement macros involving %zu blocks (average macro size %f)\n",
+            blk_loc_registry.place_macros().macros().size(), num_macro_members,
+            float(num_macro_members) / blk_loc_registry.place_macros().macros().size());
+    VTR_LOG("\n");
+
+    char msg[vtr::bufsize];
+    sprintf(msg,
+            "Initial Placement.  Cost: %g  BB Cost: %g  TD Cost %g \t Channel Factor: %d",
+            costs.cost, costs.bb_cost, costs.timing_cost, placer_opts.place_chan_width);
+
+    // Draw the initial placement
+    update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info);
+
+    if (placer_opts.placement_saves_per_temperature >= 1) {
+        std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0, 0);
+        VTR_LOG("Saving initial placement to file: %s\n", filename.c_str());
+        print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs());
+    }
+}
 
 void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
                                         const t_analysis_opts& analysis_opts,
diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h
index f6bb64b8f9d..7edc7b5b190 100644
--- a/vpr/src/place/place_log_util.h
+++ b/vpr/src/place/place_log_util.h
@@ -24,6 +24,7 @@ class PlacementLogPrinter {
     void print_resources_utilization() const;
     void print_placement_swaps_stats() const;
     void print_place_status(float elapsed_sec) const;
+    void print_initial_placement_stats() const;
 
   private:
     const Placer& placer_;
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index ec78ce47fa8..f8f5e92f7af 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -126,7 +126,7 @@ Placer::Placer(const Netlist<>& net_list,
    // Sanity check that initial placement is legal
    check_place_();
 
-   print_initial_placement_stats_();
+   log_printer_.print_initial_placement_stats();
 
    annealer_ = std::make_unique<PlacementAnnealer>(placer_opts_, placer_state_, costs_, net_cost_handler_, noc_cost_handler_,
                                                    noc_opts_, rng_, std::move(move_generator), std::move(move_generator2), place_delay_model_.get(),
@@ -255,52 +255,6 @@ int Placer::check_placement_costs_() {
    return error;
 }
 
-void Placer::print_initial_placement_stats_() {
-   VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n",
-           costs_.cost, costs_.bb_cost, costs_.timing_cost);
-
-   if (noc_opts_.noc) {
-       VTR_ASSERT(noc_cost_handler_.has_value());
-       noc_cost_handler_->print_noc_costs("Initial NoC Placement Costs", costs_, noc_opts_);
-   }
-
-   if (placer_opts_.place_algorithm.is_timing_driven()) {
-       VTR_LOG("Initial placement estimated Critical Path Delay (CPD): %g ns\n",
-               1e9 * critical_path_.delay());
-       VTR_LOG("Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n",
-               1e9 * timing_info_->setup_total_negative_slack());
-       VTR_LOG("Initial placement estimated setup Worst Negative Slack (sWNS): %g ns\n",
-               1e9 * timing_info_->setup_worst_negative_slack());
-       VTR_LOG("\n");
-       VTR_LOG("Initial placement estimated setup slack histogram:\n");
-       print_histogram(create_setup_slack_histogram(*timing_info_->setup_analyzer()));
-   }
-
-   const BlkLocRegistry& blk_loc_registry = placer_state_.blk_loc_registry();
-   size_t num_macro_members = 0;
-   for (const t_pl_macro& macro : blk_loc_registry.place_macros().macros()) {
-       num_macro_members += macro.members.size();
-   }
-   VTR_LOG("Placement contains %zu placement macros involving %zu blocks (average macro size %f)\n",
-           blk_loc_registry.place_macros().macros().size(), num_macro_members,
-           float(num_macro_members) / blk_loc_registry.place_macros().macros().size());
-   VTR_LOG("\n");
-
-   char msg[vtr::bufsize];
-   sprintf(msg,
-           "Initial Placement.  Cost: %g  BB Cost: %g  TD Cost %g \t Channel Factor: %d",
-           costs_.cost, costs_.bb_cost, costs_.timing_cost, placer_opts_.place_chan_width);
-
-   // Draw the initial placement
-   update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info_);
-
-   if (placer_opts_.placement_saves_per_temperature >= 1) {
-       std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0, 0);
-       VTR_LOG("Saving initial placement to file: %s\n", filename.c_str());
-       print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs());
-   }
-}
-
 void Placer::place() {
    const auto& timing_ctx = g_vpr_ctx.timing();
    const auto& cluster_ctx = g_vpr_ctx.clustering();
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index ec0aaaeb44c..1fb706ace98 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -101,8 +101,6 @@ class Placer {
 
     int check_placement_costs_();
 
-    void print_initial_placement_stats_();
-
     void print_post_placement_stats_();
 };
 

From f2fab3846917735fa490283e6f1be3b11c5dd9e2 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 15:39:41 -0500
Subject: [PATCH 11/32] add msg_ member variable to PlacementLogPrinter

---
 vpr/src/place/place_log_util.cpp | 15 +++++++++++----
 vpr/src/place/place_log_util.h   |  2 ++
 vpr/src/place/placer.cpp         |  5 -----
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp
index 5e375aa1f35..c64e425a17a 100644
--- a/vpr/src/place/place_log_util.cpp
+++ b/vpr/src/place/place_log_util.cpp
@@ -12,7 +12,8 @@
 #include "read_place.h"
 
 PlacementLogPrinter::PlacementLogPrinter(const Placer& placer)
-    : placer_(placer) {}
+    : placer_(placer)
+    , msg_(vtr::bufsize) {}
 
 void PlacementLogPrinter::print_place_status_header() const {
     const bool noc_enabled = placer_.noc_opts().noc;
@@ -44,6 +45,8 @@ void PlacementLogPrinter::print_place_status(float elapsed_sec) const {
     const t_annealing_state& annealing_state = annealer.get_annealing_state();
     const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats();
     const int tot_moves = annealer.get_total_iteration();
+    const t_placer_costs& costs = placer_.costs();
+    std::shared_ptr<const SetupTimingInfo> timing_info = placer_.timing_info();
 
     const bool noc_enabled = placer_.noc_opts().noc;
     const NocCostTerms& noc_cost_terms = placer_.costs().noc_cost_terms;
@@ -77,6 +80,11 @@ void PlacementLogPrinter::print_place_status(float elapsed_sec) const {
 
     VTR_LOG("\n");
     fflush(stdout);
+
+   sprintf(msg_.data(), "Cost: %g  BB Cost %g  TD Cost %g  Temperature: %g",
+           costs.cost, costs.bb_cost, costs.timing_cost, annealing_state.t);
+
+   update_screen(ScreenUpdatePriority::MINOR, msg_.data(), PLACEMENT, timing_info);
 }
 
 void PlacementLogPrinter::print_resources_utilization() const {
@@ -177,13 +185,12 @@ void PlacementLogPrinter::print_initial_placement_stats() const {
             float(num_macro_members) / blk_loc_registry.place_macros().macros().size());
     VTR_LOG("\n");
 
-    char msg[vtr::bufsize];
-    sprintf(msg,
+    sprintf(msg_.data(),
             "Initial Placement.  Cost: %g  BB Cost: %g  TD Cost %g \t Channel Factor: %d",
             costs.cost, costs.bb_cost, costs.timing_cost, placer_opts.place_chan_width);
 
     // Draw the initial placement
-    update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info);
+    update_screen(ScreenUpdatePriority::MAJOR, msg_.data(), PLACEMENT, timing_info);
 
     if (placer_opts.placement_saves_per_temperature >= 1) {
         std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0, 0);
diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h
index 7edc7b5b190..e33d5290953 100644
--- a/vpr/src/place/place_log_util.h
+++ b/vpr/src/place/place_log_util.h
@@ -3,6 +3,7 @@
 #define VTR_PLACE_LOG_UTIL_H
 
 #include <cstddef>
+#include <vector>
 
 #include "timing_info_fwd.h"
 #include "PlacementDelayCalculator.h"
@@ -28,6 +29,7 @@ class PlacementLogPrinter {
 
   private:
     const Placer& placer_;
+    mutable std::vector<char> msg_;
 };
 
 void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index f8f5e92f7af..4d1d1c1c4e9 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -297,11 +297,6 @@ void Placer::place() {
 
            log_printer_.print_place_status(temperature_timer.elapsed_sec());
 
-//           sprintf(msg, "Cost: %g  BB Cost %g  TD Cost %g  Temperature: %g",
-//                   costs_.cost, costs_.bb_cost, costs_.timing_cost, annealing_state.t);
-//
-//           update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info_);
-
            //#ifdef VERBOSE
            //            if (getEchoEnabled()) {
            //                print_clb_placement("first_iteration_clb_placement.echo");

From efe43e96cd186dba5c4c377c1c0e3fe0dd239575 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 16:25:30 -0500
Subject: [PATCH 12/32] add print_post_placement_stats() to PlacementLogPrinter

---
 vpr/src/place/place_log_util.cpp | 56 ++++++++++++++++++++++
 vpr/src/place/place_log_util.h   |  1 +
 vpr/src/place/placer.cpp         | 80 ++++++--------------------------
 vpr/src/place/placer.h           |  6 +--
 vpr/src/timing/timing_util.cpp   |  4 +-
 vpr/src/timing/timing_util.h     |  2 +-
 6 files changed, 77 insertions(+), 72 deletions(-)

diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp
index c64e425a17a..c9c26cece20 100644
--- a/vpr/src/place/place_log_util.cpp
+++ b/vpr/src/place/place_log_util.cpp
@@ -10,6 +10,7 @@
 #include "placer.h"
 #include "draw.h"
 #include "read_place.h"
+#include "tatum/echo_writer.hpp"
 
 PlacementLogPrinter::PlacementLogPrinter(const Placer& placer)
     : placer_(placer)
@@ -199,6 +200,61 @@ void PlacementLogPrinter::print_initial_placement_stats() const {
     }
 }
 
+void PlacementLogPrinter::print_post_placement_stats() const {
+    const auto& timing_ctx = g_vpr_ctx.timing();
+    const PlacementAnnealer& annealer = placer_.annealer();
+    const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats();
+
+    VTR_LOG("\n");
+    VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called);
+    //    blocks_affected.move_abortion_logger.report_aborted_moves();
+
+    if (placer_.placer_opts_.place_algorithm.is_timing_driven()) {
+        //Final timing estimate
+        VTR_ASSERT(placer_.timing_info_);
+
+        if (isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) {
+            tatum::write_echo(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH),
+                              *timing_ctx.graph, *timing_ctx.constraints,
+                              *placer_.placement_delay_calc_, placer_.timing_info_->analyzer());
+
+            tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(placer_.analysis_opts_.echo_dot_timing_graph_node);
+            write_setup_timing_graph_dot(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"),
+                                         *placer_.timing_info_, debug_tnode);
+        }
+
+        generate_post_place_timing_reports(placer_.placer_opts_, placer_.analysis_opts_, *placer_.timing_info_,
+                                           *placer_.placement_delay_calc_, /*is_flat=*/false, placer_.placer_state_.blk_loc_registry());
+
+        // Print critical path delay metrics
+        VTR_LOG("\n");
+        print_setup_timing_summary(*timing_ctx.constraints,
+                                   *placer_.timing_info_->setup_analyzer(), "Placement estimated ", "");
+    }
+
+    char msg[vtr::bufsize];
+    sprintf(msg,
+            "Placement. Cost: %g  bb_cost: %g td_cost: %g Channel Factor: %d",
+            placer_.costs_.cost, placer_.costs_.bb_cost, placer_.costs_.timing_cost, placer_.placer_opts_.place_chan_width);
+    VTR_LOG("Placement cost: %g, bb_cost: %g, td_cost: %g, \n", placer_.costs_.cost,
+            placer_.costs_.bb_cost, placer_.costs_.timing_cost);
+    update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, placer_.timing_info_);
+
+    // print the noc costs info
+    if (placer_.noc_opts_.noc) {
+        VTR_ASSERT(placer_.noc_cost_handler_.has_value());
+        placer_.noc_cost_handler_->print_noc_costs("\nNoC Placement Costs", placer_.costs_, placer_.noc_opts_);
+
+        // TODO: move this to an appropriate file
+#ifdef ENABLE_NOC_SAT_ROUTING
+        if (costs.noc_cost_terms.congestion > 0.0) {
+            VTR_LOG("NoC routing configuration is congested. Invoking the SAT NoC router.\n");
+            invoke_sat_router(costs, noc_opts, placer_opts.seed);
+        }
+#endif //ENABLE_NOC_SAT_ROUTING
+    }
+}
+
 void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
                                         const t_analysis_opts& analysis_opts,
                                         const SetupTimingInfo& timing_info,
diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h
index e33d5290953..41511ae1dd4 100644
--- a/vpr/src/place/place_log_util.h
+++ b/vpr/src/place/place_log_util.h
@@ -26,6 +26,7 @@ class PlacementLogPrinter {
     void print_placement_swaps_stats() const;
     void print_place_status(float elapsed_sec) const;
     void print_initial_placement_stats() const;
+    void print_post_placement_stats() const;
 
   private:
     const Placer& placer_;
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 4d1d1c1c4e9..ec2cc7a9551 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -9,12 +9,12 @@
 #include "analytic_placer.h"
 #include "initial_placement.h"
 #include "concrete_timing_info.h"
-#include "tatum/echo_writer.hpp"
 #include "verify_placement.h"
 #include "place_timing_update.h"
 #include "annealer.h"
 #include "RL_agent_util.h"
 #include "place_checkpoint.h"
+#include "tatum/echo_writer.hpp"
 
 Placer::Placer(const Netlist<>& net_list,
                const t_placer_opts& placer_opts,
@@ -179,17 +179,14 @@ void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list,
 
    // Write out the initial timing echo file
    if (isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)) {
-       tatum::write_echo(
-           getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH),
-           *timing_ctx.graph, *timing_ctx.constraints,
-           *placement_delay_calc_, timing_info_->analyzer());
+       tatum::write_echo(getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH),
+                         *timing_ctx.graph, *timing_ctx.constraints,
+                         *placement_delay_calc_, timing_info_->analyzer());
 
        tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node);
 
-       write_setup_timing_graph_dot(
-           getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)
-               + std::string(".dot"),
-           *timing_info_, debug_tnode);
+       write_setup_timing_graph_dot(getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"),
+                                    *timing_info_, debug_tnode);
    }
 
    costs_.timing_cost_norm = 1 / costs_.timing_cost;
@@ -337,8 +334,11 @@ void Placer::place() {
        perform_full_timing_update(crit_params, place_delay_model_.get(), placer_criticalities_.get(),
                                   placer_setup_slacks_.get(), pin_timing_invalidator_.get(),
                                   timing_info_.get(), &costs_, placer_state_);
+
+       critical_path_ = timing_info_->least_slack_critical_path();
+
        VTR_LOG("post-quench CPD = %g (ns) \n",
-               1e9 * timing_info_->least_slack_critical_path().delay());
+               1e9 * critical_path_.delay());
     }
 
     // See if our latest checkpoint is better than the current placement solution
@@ -369,7 +369,7 @@ void Placer::place() {
 
     check_place_();
 
-    print_post_placement_stats_();
+    log_printer_.print_post_placement_stats();
 
     // Print out swap statistics and resource utilization
     log_printer_.print_resources_utilization();
@@ -391,61 +391,6 @@ void Placer::place() {
             p_runtime_ctx.f_update_td_costs_total_elapsed_sec);
 }
 
-void Placer::print_post_placement_stats_() {
-    const auto& timing_ctx = g_vpr_ctx.timing();
-    const auto& [swap_stats, move_type_stats, placer_stats] = annealer_->get_stats();
-
-    VTR_LOG("\n");
-    VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called);
-//    blocks_affected.move_abortion_logger.report_aborted_moves();
-
-    if (placer_opts_.place_algorithm.is_timing_driven()) {
-       //Final timing estimate
-       VTR_ASSERT(timing_info_);
-
-       critical_path_ = timing_info_->least_slack_critical_path();
-
-       if (isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) {
-           tatum::write_echo(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH),
-                             *timing_ctx.graph, *timing_ctx.constraints,
-                             *placement_delay_calc_, timing_info_->analyzer());
-
-           tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts_.echo_dot_timing_graph_node);
-           write_setup_timing_graph_dot(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"),
-                                        *timing_info_, debug_tnode);
-       }
-
-       generate_post_place_timing_reports(placer_opts_, analysis_opts_, *timing_info_,
-                                          *placement_delay_calc_, /*is_flat=*/false, placer_state_.blk_loc_registry());
-
-       // Print critical path delay metrics
-       VTR_LOG("\n");
-       print_setup_timing_summary(*timing_ctx.constraints,
-                                  *timing_info_->setup_analyzer(), "Placement estimated ", "");
-    }
-
-    char msg[vtr::bufsize];
-    sprintf(msg,
-            "Placement. Cost: %g  bb_cost: %g td_cost: %g Channel Factor: %d",
-            costs_.cost, costs_.bb_cost, costs_.timing_cost, placer_opts_.place_chan_width);
-    VTR_LOG("Placement cost: %g, bb_cost: %g, td_cost: %g, \n", costs_.cost,
-            costs_.bb_cost, costs_.timing_cost);
-    update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info_);
-
-    // print the noc costs info
-    if (noc_opts_.noc) {
-       VTR_ASSERT(noc_cost_handler_.has_value());
-       noc_cost_handler_->print_noc_costs("\nNoC Placement Costs", costs_, noc_opts_);
-
-#ifdef ENABLE_NOC_SAT_ROUTING
-       if (costs.noc_cost_terms.congestion > 0.0) {
-           VTR_LOG("NoC routing configuration is congested. Invoking the SAT NoC router.\n");
-           invoke_sat_router(costs, noc_opts, placer_opts.seed);
-       }
-#endif //ENABLE_NOC_SAT_ROUTING
-    }
-}
-
 void Placer::copy_locs_to_global_state() {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
 
@@ -481,12 +426,15 @@ const t_placer_costs& Placer::costs() const {
 const tatum::TimingPathInfo& Placer::critical_path() const {
     return critical_path_;
 }
+
 std::shared_ptr<const SetupTimingInfo> Placer::timing_info() const {
     return timing_info_;
 }
+
 const PlacerState& Placer::placer_state() const {
     return placer_state_;
 }
+
 const std::optional<NocCostHandler>& Placer::noc_cost_handler() const {
     return noc_cost_handler_;
 }
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index 1fb706ace98..ea6643bb7f5 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -84,7 +84,9 @@ class Placer {
 
     std::unique_ptr<PlacementAnnealer> annealer_;
 
-    PlacementLogPrinter log_printer_;
+    const PlacementLogPrinter log_printer_;
+
+    friend void PlacementLogPrinter::print_post_placement_stats() const;
 
   private:
     void alloc_and_init_timing_objects_(const Netlist<>& net_list,
@@ -100,8 +102,6 @@ class Placer {
     void check_place_();
 
     int check_placement_costs_();
-
-    void print_post_placement_stats_();
 };
 
 #endif //VTR_PLACER_H
diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp
index a210c0dbdcd..a0de3038fde 100644
--- a/vpr/src/timing/timing_util.cpp
+++ b/vpr/src/timing/timing_util.cpp
@@ -47,7 +47,7 @@ tatum::TimingPathInfo find_least_slack_critical_path_delay(const tatum::TimingCo
 
     auto cpds = tatum::find_critical_paths(*timing_ctx.graph, constraints, setup_analyzer);
 
-    //Record the maximum critical path accross all domain pairs
+    //Record the maximum critical path across all domain pairs
     for (const auto& path_info : cpds) {
         if (path_info.slack() < crit_path_info.slack() || std::isnan(crit_path_info.slack())) {
             crit_path_info = path_info;
@@ -855,7 +855,7 @@ tatum::NodeId pin_name_to_tnode(std::string pin_name) {
     return tnode;
 }
 
-void write_setup_timing_graph_dot(std::string filename, SetupTimingInfo& timing_info, tatum::NodeId debug_node) {
+void write_setup_timing_graph_dot(std::string filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node) {
     auto& timing_graph = *timing_info.timing_graph();
 
     auto dot_writer = tatum::make_graphviz_dot_writer(timing_graph, *timing_info.delay_calculator());
diff --git a/vpr/src/timing/timing_util.h b/vpr/src/timing/timing_util.h
index e4d45c84213..0a04a132f44 100644
--- a/vpr/src/timing/timing_util.h
+++ b/vpr/src/timing/timing_util.h
@@ -119,7 +119,7 @@ void print_tatum_cpds(std::vector<tatum::TimingPathInfo> cpds);
 tatum::NodeId id_or_pin_name_to_tnode(std::string name_or_id);
 tatum::NodeId pin_name_to_tnode(std::string name);
 
-void write_setup_timing_graph_dot(std::string filename, SetupTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID());
+void write_setup_timing_graph_dot(std::string filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID());
 void write_hold_timing_graph_dot(std::string filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID());
 
 struct TimingStats {

From 8b7780ea4d82fef6603646bb090fd0a05bccf161 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 16:32:46 -0500
Subject: [PATCH 13/32] call get_move_abortion_logger() in
 print_post_placement_stats()

---
 vpr/src/place/annealer.cpp       | 6 +++++-
 vpr/src/place/annealer.h         | 7 +++++++
 vpr/src/place/place_log_util.cpp | 2 +-
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 56f419477e2..1beb0e336c7 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -778,6 +778,10 @@ std::tuple<const t_swap_stats&, const MoveTypeStat&, const t_placer_statistics&>
     return {swap_stats_, move_type_stats_, placer_stats_};
 }
 
+const MoveAbortionLogger& PlacementAnnealer::get_move_abortion_logger() const {
+    return blocks_affected_.move_abortion_logger;
+}
+
 void PlacementAnnealer::LOG_MOVE_STATS_HEADER() {
     if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
         if (move_stats_file_) {
@@ -857,4 +861,4 @@ e_move_result PlacementAnnealer::assess_swap_(double delta_c, double t) {
     }
     VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(hill climbing)\n");
     return e_move_result::REJECTED;
-}
\ No newline at end of file
+}
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index 039ecfb652f..d8e9939cb27 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -210,6 +210,13 @@ class PlacementAnnealer {
     /// @brief Returns constant references to different statistics objects
     std::tuple<const t_swap_stats&, const MoveTypeStat&, const t_placer_statistics&> get_stats() const;
 
+    /**
+     * @brief Returns MoveAbortionLogger to report how many moves
+     * were aborted for each reason.
+     * @return A constant reference to a  MoveAbortionLogger object.
+     */
+    const MoveAbortionLogger& get_move_abortion_logger() const;
+
   private:
     /**
      * @brief Determines whether a move should be accepted or not.
diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp
index c9c26cece20..40598a67a0f 100644
--- a/vpr/src/place/place_log_util.cpp
+++ b/vpr/src/place/place_log_util.cpp
@@ -207,7 +207,7 @@ void PlacementLogPrinter::print_post_placement_stats() const {
 
     VTR_LOG("\n");
     VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called);
-    //    blocks_affected.move_abortion_logger.report_aborted_moves();
+    annealer.get_move_abortion_logger().report_aborted_moves();
 
     if (placer_.placer_opts_.place_algorithm.is_timing_driven()) {
         //Final timing estimate

From a2da0eeac6b4978b5adee8b8cad61293fbbb5c5f Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 16:33:59 -0500
Subject: [PATCH 14/32] use msg member varible instead of msg local variable

---
 vpr/src/place/place_log_util.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp
index 40598a67a0f..0f2b88ccfcf 100644
--- a/vpr/src/place/place_log_util.cpp
+++ b/vpr/src/place/place_log_util.cpp
@@ -232,13 +232,12 @@ void PlacementLogPrinter::print_post_placement_stats() const {
                                    *placer_.timing_info_->setup_analyzer(), "Placement estimated ", "");
     }
 
-    char msg[vtr::bufsize];
-    sprintf(msg,
+    sprintf(msg_.data(),
             "Placement. Cost: %g  bb_cost: %g td_cost: %g Channel Factor: %d",
             placer_.costs_.cost, placer_.costs_.bb_cost, placer_.costs_.timing_cost, placer_.placer_opts_.place_chan_width);
     VTR_LOG("Placement cost: %g, bb_cost: %g, td_cost: %g, \n", placer_.costs_.cost,
             placer_.costs_.bb_cost, placer_.costs_.timing_cost);
-    update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, placer_.timing_info_);
+    update_screen(ScreenUpdatePriority::MAJOR, msg_.data(), PLACEMENT, placer_.timing_info_);
 
     // print the noc costs info
     if (placer_.noc_opts_.noc) {

From a632425b98f8030cee736e94f8fea5303ead0b7c Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 16:38:07 -0500
Subject: [PATCH 15/32] add quiet mode to PlacementLogPrinter

---
 vpr/src/place/place_log_util.cpp | 29 +++++++++++++++++++++++++++--
 vpr/src/place/place_log_util.h   |  3 ++-
 vpr/src/place/placer.cpp         |  2 +-
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp
index 0f2b88ccfcf..37e16fd4cd6 100644
--- a/vpr/src/place/place_log_util.cpp
+++ b/vpr/src/place/place_log_util.cpp
@@ -12,11 +12,16 @@
 #include "read_place.h"
 #include "tatum/echo_writer.hpp"
 
-PlacementLogPrinter::PlacementLogPrinter(const Placer& placer)
+PlacementLogPrinter::PlacementLogPrinter(const Placer& placer, bool quiet)
     : placer_(placer)
-    , msg_(vtr::bufsize) {}
+    , quiet_(quiet)
+    , msg_(quiet ? 0 : vtr::bufsize) {}
 
 void PlacementLogPrinter::print_place_status_header() const {
+    if (quiet_) {
+        return;
+    }
+
     const bool noc_enabled = placer_.noc_opts().noc;
 
     VTR_LOG("\n");
@@ -42,6 +47,10 @@ void PlacementLogPrinter::print_place_status_header() const {
 }
 
 void PlacementLogPrinter::print_place_status(float elapsed_sec) const {
+    if (quiet_) {
+        return;
+    }
+
     const PlacementAnnealer& annealer = placer_.annealer();
     const t_annealing_state& annealing_state = annealer.get_annealing_state();
     const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats();
@@ -89,6 +98,10 @@ void PlacementLogPrinter::print_place_status(float elapsed_sec) const {
 }
 
 void PlacementLogPrinter::print_resources_utilization() const {
+    if (quiet_) {
+        return;
+    }
+
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& block_locs = placer_.placer_state().block_locs();
@@ -126,6 +139,10 @@ void PlacementLogPrinter::print_resources_utilization() const {
 }
 
 void PlacementLogPrinter::print_placement_swaps_stats() const {
+    if (quiet_) {
+        return;
+    }
+
     const PlacementAnnealer& annealer = placer_.annealer();
     const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats();
     const t_annealing_state& annealing_state = annealer.get_annealing_state();
@@ -148,6 +165,10 @@ void PlacementLogPrinter::print_placement_swaps_stats() const {
             swap_stats.num_swap_aborted, 100 * abort_rate);
 }
 void PlacementLogPrinter::print_initial_placement_stats() const {
+    if (quiet_) {
+        return;
+    }
+
     const t_placer_costs& costs = placer_.costs();
     const t_noc_opts& noc_opts = placer_.noc_opts();
     const t_placer_opts& placer_opts = placer_.placer_opts();
@@ -201,6 +222,10 @@ void PlacementLogPrinter::print_initial_placement_stats() const {
 }
 
 void PlacementLogPrinter::print_post_placement_stats() const {
+    if (quiet_) {
+        return;
+    }
+
     const auto& timing_ctx = g_vpr_ctx.timing();
     const PlacementAnnealer& annealer = placer_.annealer();
     const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats();
diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h
index 41511ae1dd4..d01d3f4b6dc 100644
--- a/vpr/src/place/place_log_util.h
+++ b/vpr/src/place/place_log_util.h
@@ -19,7 +19,7 @@ class Placer;
 
 class PlacementLogPrinter {
   public:
-    explicit PlacementLogPrinter(const Placer& placer);
+    explicit PlacementLogPrinter(const Placer& placer, bool quiet);
 
     void print_place_status_header() const;
     void print_resources_utilization() const;
@@ -30,6 +30,7 @@ class PlacementLogPrinter {
 
   private:
     const Placer& placer_;
+    const bool quiet_;
     mutable std::vector<char> msg_;
 };
 
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index ec2cc7a9551..b0a1bc45652 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -31,7 +31,7 @@ Placer::Placer(const Netlist<>& net_list,
     , rng_(placer_opts.seed)
     , net_cost_handler_(placer_opts, placer_state_, cube_bb)
     , place_delay_model_(std::move(place_delay_model))
-    , log_printer_(*this) {
+    , log_printer_(*this, /*quiet*/false) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& atom_ctx = g_vpr_ctx.atom();

From 291ec6fb59a024c1b4e8a17b6914bc1b60cc74b2 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 16:46:41 -0500
Subject: [PATCH 16/32] record timing_stats in Placer class

---
 vpr/src/place/place.cpp          |  2 --
 vpr/src/place/place_log_util.cpp | 22 ++++++++++++++++++++++
 vpr/src/place/placer.cpp         | 32 +++++++-------------------------
 vpr/src/place/placer.h           |  4 ++++
 4 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index ba86e91020b..fb58339355b 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -61,8 +61,6 @@ void try_place(const Netlist<>& net_list,
      */
     VTR_ASSERT(!is_flat);
     const auto& device_ctx = g_vpr_ctx.device();
-//    const auto& timing_ctx = g_vpr_ctx.timing();
-//    auto pre_place_timing_stats = timing_ctx.stats;
 
     /* Placement delay model is independent of the placement and can be shared across
      * multiple placers. So, it is created and initialized once. */
diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp
index 37e16fd4cd6..29ba2b917a7 100644
--- a/vpr/src/place/place_log_util.cpp
+++ b/vpr/src/place/place_log_util.cpp
@@ -1,3 +1,4 @@
+
 #include "place_log_util.h"
 
 #include "vtr_log.h"
@@ -277,6 +278,27 @@ void PlacementLogPrinter::print_post_placement_stats() const {
         }
 #endif //ENABLE_NOC_SAT_ROUTING
     }
+
+    // Print out swap statistics and resource utilization
+    print_resources_utilization();
+    print_placement_swaps_stats();
+
+    move_type_stats.print_placement_move_types_stats();
+
+    if (placer_.noc_opts_.noc) {
+        write_noc_placement_file(placer_.noc_opts_.noc_placement_file_name,
+                                 placer_.placer_state_.block_locs());
+    }
+
+    print_timing_stats("Placement Quench", placer_.post_quench_timing_stats_, placer_.pre_quench_timing_stats_);
+    print_timing_stats("Placement Total ", timing_ctx.stats, placer_.pre_place_timing_stats_);
+
+    const auto& p_runtime_ctx = placer_.placer_state_.runtime();
+    VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n",
+            p_runtime_ctx.f_update_td_costs_connections_elapsed_sec,
+            p_runtime_ctx.f_update_td_costs_nets_elapsed_sec,
+            p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec,
+            p_runtime_ctx.f_update_td_costs_total_elapsed_sec);
 }
 
 void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index b0a1bc45652..d2386e86128 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -36,6 +36,9 @@ Placer::Placer(const Netlist<>& net_list,
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& atom_ctx = g_vpr_ctx.atom();
 
+    const auto& timing_ctx = g_vpr_ctx.timing();
+    pre_place_timing_stats_ = timing_ctx.stats;
+
     init_placement_context(placer_state_.mutable_blk_loc_registry(), directs);
 
     // create a NoC cost handler if NoC optimization is enabled
@@ -255,7 +258,7 @@ int Placer::check_placement_costs_() {
 void Placer::place() {
    const auto& timing_ctx = g_vpr_ctx.timing();
    const auto& cluster_ctx = g_vpr_ctx.clustering();
-   const auto& p_runtime_ctx = placer_state_.runtime();
+
 
    bool skip_anneal = false;
 #ifdef ENABLE_ANALYTIC_PLACE
@@ -265,9 +268,6 @@ void Placer::place() {
    }
 #endif
 
-   const t_annealing_state& annealing_state = annealer_->get_annealing_state();
-   const auto& [swap_stats, move_type_stats, placer_stats] = annealer_->get_stats();
-
    if (!skip_anneal) {
        //Table header
        log_printer_.print_place_status_header();
@@ -307,7 +307,7 @@ void Placer::place() {
     // Start Quench
     annealer_->start_quench();
 
-    auto pre_quench_timing_stats = timing_ctx.stats;
+    pre_quench_timing_stats_ = timing_ctx.stats;
     { // Quench
        vtr::ScopedFinishTimer temperature_timer("Placement Quench");
 
@@ -323,9 +323,10 @@ void Placer::place() {
 
        log_printer_.print_place_status(temperature_timer.elapsed_sec());
     }
-    auto post_quench_timing_stats = timing_ctx.stats;
+    post_quench_timing_stats_ = timing_ctx.stats;
 
     // Final timing analysis
+    const t_annealing_state& annealing_state = annealer_->get_annealing_state();
     PlaceCritParams crit_params;
     crit_params.crit_exponent = annealing_state.crit_exponent;
     crit_params.crit_limit = placer_opts_.place_crit_limit;
@@ -370,25 +371,6 @@ void Placer::place() {
     check_place_();
 
     log_printer_.print_post_placement_stats();
-
-    // Print out swap statistics and resource utilization
-    log_printer_.print_resources_utilization();
-    log_printer_.print_placement_swaps_stats();
-
-    move_type_stats.print_placement_move_types_stats();
-
-    if (noc_opts_.noc) {
-       write_noc_placement_file(noc_opts_.noc_placement_file_name, placer_state_.block_locs());
-    }
-
-    print_timing_stats("Placement Quench", post_quench_timing_stats, pre_quench_timing_stats);
-//    print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats);
-
-    VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n",
-            p_runtime_ctx.f_update_td_costs_connections_elapsed_sec,
-            p_runtime_ctx.f_update_td_costs_nets_elapsed_sec,
-            p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec,
-            p_runtime_ctx.f_update_td_costs_total_elapsed_sec);
 }
 
 void Placer::copy_locs_to_global_state() {
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index ea6643bb7f5..29d4b4bdd2d 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -86,6 +86,10 @@ class Placer {
 
     const PlacementLogPrinter log_printer_;
 
+    t_timing_analysis_profile_info pre_place_timing_stats_;
+    t_timing_analysis_profile_info pre_quench_timing_stats_;
+    t_timing_analysis_profile_info post_quench_timing_stats_;
+
     friend void PlacementLogPrinter::print_post_placement_stats() const;
 
   private:

From d923d7518d3fd863a38e7a8c3f20db9e09261f4a Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 20 Nov 2024 11:59:09 -0500
Subject: [PATCH 17/32] add is_flat to Placer

---
 vpr/src/place/place.cpp  |  2 +-
 vpr/src/place/placer.cpp | 12 +++++++-----
 vpr/src/place/placer.h   | 10 ++++++----
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index e687f3d41b3..f0d0ab034c6 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -93,7 +93,7 @@ void try_place(const Netlist<>& net_list,
     place_ctx.lock_loc_vars();
     place_ctx.compressed_block_grids = create_compressed_block_grids();
 
-    Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb);
+    Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb, is_flat, /*quiet=*/false);
 
 #ifndef NO_GRAPHICS
     if (placer.noc_cost_handler().has_value()) {
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index d2386e86128..e2a1af629de 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -22,7 +22,9 @@ Placer::Placer(const Netlist<>& net_list,
                const t_noc_opts& noc_opts,
                const std::vector<t_direct_inf>& directs,
                std::shared_ptr<PlaceDelayModel> place_delay_model,
-               bool cube_bb)
+               bool cube_bb,
+               bool is_flat,
+               bool quiet)
     : placer_opts_(placer_opts)
     , analysis_opts_(analysis_opts)
     , noc_opts_(noc_opts)
@@ -31,7 +33,8 @@ Placer::Placer(const Netlist<>& net_list,
     , rng_(placer_opts.seed)
     , net_cost_handler_(placer_opts, placer_state_, cube_bb)
     , place_delay_model_(std::move(place_delay_model))
-    , log_printer_(*this, /*quiet*/false) {
+    , log_printer_(*this, quiet)
+    , is_flat_(is_flat) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& atom_ctx = g_vpr_ctx.atom();
@@ -61,7 +64,6 @@ Placer::Placer(const Netlist<>& net_list,
         normalize_noc_cost_weighting_factor(const_cast<t_noc_opts&>(noc_opts));
     }
 
-
     BlkLocRegistry& blk_loc_registry = placer_state_.mutable_blk_loc_registry();
     initial_placement(placer_opts, placer_opts.constraints_file.c_str(),
                       noc_opts, blk_loc_registry, noc_cost_handler_, rng_);
@@ -151,7 +153,7 @@ void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list,
    placement_delay_calc_ = std::make_shared<PlacementDelayCalculator>(atom_ctx.nlist,
                                                                       atom_ctx.lookup,
                                                                       p_timing_ctx.connection_delay,
-                                                                      /*is_flat=*/false);
+                                                                      is_flat_);
    placement_delay_calc_->set_tsu_margin_relative(placer_opts_.tsu_rel_margin);
    placement_delay_calc_->set_tsu_margin_absolute(placer_opts_.tsu_abs_margin);
 
@@ -167,7 +169,7 @@ void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list,
                                                              atom_ctx.nlist,
                                                              atom_ctx.lookup,
                                                              *timing_info_->timing_graph(),
-                                                             /*is_flat=*/false);
+                                                             is_flat_);
 
    // First time compute timing and costs, compute from scratch
    PlaceCritParams crit_params;
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index 29d4b4bdd2d..d17f61b0650 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -27,7 +27,9 @@ class Placer {
            const t_noc_opts& noc_opts,
            const std::vector<t_direct_inf>& directs,
            std::shared_ptr<PlaceDelayModel> place_delay_model,
-           bool cube_bb);
+           bool cube_bb,
+           bool is_flat,
+           bool quiet);
 
     void place();
 
@@ -55,7 +57,6 @@ class Placer {
 
     const std::optional<NocCostHandler>& noc_cost_handler() const;
 
-    //TODO: make this private
   private:
     const t_placer_opts& placer_opts_;
     const t_analysis_opts& analysis_opts_;
@@ -66,6 +67,8 @@ class Placer {
     NetCostHandler net_cost_handler_;
     std::optional<NocCostHandler> noc_cost_handler_;
     std::shared_ptr<PlaceDelayModel> place_delay_model_;
+    const PlacementLogPrinter log_printer_;
+    const bool is_flat_;
 
     t_placement_checkpoint placement_checkpoint_;
 
@@ -76,7 +79,6 @@ class Placer {
     std::unique_ptr<NetPinTimingInvalidator> pin_timing_invalidator_;
     tatum::TimingPathInfo critical_path_;
 
-
     std::unique_ptr<vtr::ScopedStartFinishTimer> timer_;
 
     IntraLbPbPinLookup pb_gpin_lookup_;
@@ -84,7 +86,7 @@ class Placer {
 
     std::unique_ptr<PlacementAnnealer> annealer_;
 
-    const PlacementLogPrinter log_printer_;
+
 
     t_timing_analysis_profile_info pre_place_timing_stats_;
     t_timing_analysis_profile_info pre_quench_timing_stats_;

From c61625464ebe6b68a6aa348f818ff5a1bb8a16f8 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 20 Nov 2024 13:05:27 -0500
Subject: [PATCH 18/32] fix failure in vtr_reg_strong/strong_graphics_commands

---
 vpr/src/base/read_options.cpp |  2 +-
 vpr/src/place/place.cpp       |  9 ---------
 vpr/src/place/placer.cpp      | 11 +++++++++++
 vpr/src/place/placer.h        |  2 --
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index f789f848808..78124dd85c3 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1377,7 +1377,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
             "      * set_nets <int>\n"
             "           Sets the net drawing state\n"
             "      * set_cpd <int>\n"
-            "           Sets the criticla path delay drawing state\n"
+            "           Sets the critical path delay drawing state\n"
             "      * set_routing_util <int>\n"
             "           Sets the routing utilization drawing state\n"
             "      * set_clip_routing_util <int>\n"
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index f0d0ab034c6..496a2a1dfde 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -95,15 +95,6 @@ void try_place(const Netlist<>& net_list,
 
     Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb, is_flat, /*quiet=*/false);
 
-#ifndef NO_GRAPHICS
-    if (placer.noc_cost_handler().has_value()) {
-        get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(placer.noc_cost_handler()->get_link_bandwidth_usages());
-    }
-#endif
-
-    const int width_fac = placer_opts.place_chan_width;
-    init_draw_coords((float)width_fac, placer.placer_state().blk_loc_registry());
-
     placer.place();
 
     free_placement_structs();
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index e2a1af629de..2de33c88791 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -94,6 +94,17 @@ Placer::Placer(const Netlist<>& net_list,
        blk_loc_registry.place_sync_external_block_connections(block_id);
    }
 
+   if (!quiet) {
+#ifndef NO_GRAPHICS
+       if (noc_cost_handler_.has_value()) {
+           get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(noc_cost_handler_->get_link_bandwidth_usages());
+       }
+#endif
+
+       const int width_fac = placer_opts.place_chan_width;
+       init_draw_coords((float)width_fac, placer_state_.blk_loc_registry());
+   }
+
    // Allocate here because it goes into timing critical code where each memory allocation is expensive
    pb_gpin_lookup_ = IntraLbPbPinLookup(device_ctx.logical_block_types);
    // Enables fast look-up of atom pins connect to CLB pins
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index d17f61b0650..f65ed38b56e 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -86,8 +86,6 @@ class Placer {
 
     std::unique_ptr<PlacementAnnealer> annealer_;
 
-
-
     t_timing_analysis_profile_info pre_place_timing_stats_;
     t_timing_analysis_profile_info pre_quench_timing_stats_;
     t_timing_analysis_profile_info post_quench_timing_stats_;

From 1dbb81ba0cec65d9e0fb3ed23a39720e4ae45f65 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 20 Nov 2024 13:32:35 -0500
Subject: [PATCH 19/32] add some comments to Placer

---
 vpr/src/place/placer.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index f65ed38b56e..e9be736d4c7 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -58,18 +58,30 @@ class Placer {
     const std::optional<NocCostHandler>& noc_cost_handler() const;
 
   private:
+    /// Holds placement algorithm parameters
     const t_placer_opts& placer_opts_;
+    /// Holds timing analysis parameters
     const t_analysis_opts& analysis_opts_;
+    /// Holds NoC-related parameters
     const t_noc_opts& noc_opts_;
+    /// Placement cost terms with their normalization factors and total cost
     t_placer_costs costs_;
+    /// Holds timing, runtime, and block location information
     PlacerState placer_state_;
+    /// Random number generator used to select random blocks and locations
     vtr::RngContainer rng_;
+    /// Computes and updates net bounding box cost
     NetCostHandler net_cost_handler_;
+    /// Compute and updates NoC-related cost terms if NoC optimization is enabled
     std::optional<NocCostHandler> noc_cost_handler_;
+    /// A delay model shared between multiple instances of this class.
     std::shared_ptr<PlaceDelayModel> place_delay_model_;
+    /// Prints logs during placement
     const PlacementLogPrinter log_printer_;
+    /// Indicates if flat routing resource graph and delay model is used. It should be false.
     const bool is_flat_;
 
+    /// Stores a placement state as a retrievable checkpoint in case the placement quality deteriorates later.
     t_placement_checkpoint placement_checkpoint_;
 
     std::shared_ptr<SetupTimingInfo> timing_info_;
@@ -105,6 +117,12 @@ class Placer {
      */
     void check_place_();
 
+    /**
+     * Computes bounding box and timing cost to ensure it is
+     * within a small error margin what we thing the cost is.
+     * @return Number cost elements, i.e. BB and timing, that falls
+     * outside the acceptable round-off error margin.
+     */
     int check_placement_costs_();
 };
 

From 427f9f268eb52dc3476918a5852cd747a05c2763 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 20 Nov 2024 17:33:55 -0500
Subject: [PATCH 20/32] string_view and reference in timing_util

---
 vpr/src/place/place.cpp        |  1 -
 vpr/src/timing/timing_util.cpp | 75 ++++++++++++++++++----------------
 vpr/src/timing/timing_util.h   | 22 ++++++----
 3 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 496a2a1dfde..c0257d939c0 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -9,7 +9,6 @@
 #include "globals.h"
 #include "place.h"
 #include "annealer.h"
-#include "draw.h"
 #include "read_xml_arch_file.h"
 #include "echo_files.h"
 #include "histogram.h"
diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp
index a0de3038fde..e51fadf5d42 100644
--- a/vpr/src/timing/timing_util.cpp
+++ b/vpr/src/timing/timing_util.cpp
@@ -1,5 +1,6 @@
 #include <fstream>
 #include <sstream>
+#include <utility>
 
 #include "vtr_log.h"
 #include "vtr_assert.h"
@@ -30,7 +31,7 @@ tatum::TimingPathInfo find_longest_critical_path_delay(const tatum::TimingConstr
 
     auto cpds = tatum::find_critical_paths(*timing_ctx.graph, constraints, setup_analyzer);
 
-    //Record the maximum critical path accross all domain pairs
+    //Record the maximum critical path across all domain pairs
     for (const auto& path_info : cpds) {
         if (crit_path_info.delay() < path_info.delay() || std::isnan(crit_path_info.delay())) {
             crit_path_info = path_info;
@@ -234,7 +235,7 @@ TimingStats::TimingStats(std::string pref, double cpd, double f_max, double swns
     fmax = f_max;
     setup_worst_neg_slack = swns;
     setup_total_neg_slack = stns;
-    prefix = pref;
+    prefix = std::move(pref);
 }
 
 void TimingStats::write(OutputFormat fmt, std::ostream& output) const {
@@ -255,23 +256,23 @@ void TimingStats::write(OutputFormat fmt, std::ostream& output) const {
     }
 }
 
-void write_setup_timing_summary(std::string timing_summary_filename, const TimingStats& stats) {
-    if (timing_summary_filename.size() > 0) {
+void write_setup_timing_summary(std::string_view timing_summary_filename, const TimingStats& stats) {
+    if (!timing_summary_filename.empty()) {
         TimingStats::OutputFormat fmt;
 
-        if (vtr::check_file_name_extension(timing_summary_filename.c_str(), ".json")) {
+        if (vtr::check_file_name_extension(timing_summary_filename.data(), ".json")) {
             fmt = TimingStats::OutputFormat::JSON;
-        } else if (vtr::check_file_name_extension(timing_summary_filename.c_str(), ".xml")) {
+        } else if (vtr::check_file_name_extension(timing_summary_filename.data(), ".xml")) {
             fmt = TimingStats::OutputFormat::XML;
-        } else if (vtr::check_file_name_extension(timing_summary_filename.c_str(), ".txt")) {
+        } else if (vtr::check_file_name_extension(timing_summary_filename.data(), ".txt")) {
             fmt = TimingStats::OutputFormat::HumanReadable;
         } else {
-            VPR_FATAL_ERROR(VPR_ERROR_PACK, "Unknown extension on output %s", timing_summary_filename.c_str());
+            VPR_FATAL_ERROR(VPR_ERROR_PACK, "Unknown extension on output %s", timing_summary_filename.data());
         }
 
         std::fstream fp;
 
-        fp.open(timing_summary_filename, std::fstream::out | std::fstream::trunc);
+        fp.open(timing_summary_filename.data(), std::fstream::out | std::fstream::trunc);
         stats.write(fmt, fp);
         fp.close();
     }
@@ -279,8 +280,8 @@ void write_setup_timing_summary(std::string timing_summary_filename, const Timin
 
 void print_setup_timing_summary(const tatum::TimingConstraints& constraints,
                                 const tatum::SetupTimingAnalyzer& setup_analyzer,
-                                std::string prefix,
-                                std::string timing_summary_filename) {
+                                std::string_view prefix,
+                                std::string_view timing_summary_filename) {
     auto& timing_ctx = g_vpr_ctx.timing();
 
     auto crit_paths = tatum::find_critical_paths(*timing_ctx.graph, constraints, setup_analyzer);
@@ -292,12 +293,12 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints,
     double setup_worst_neg_slack = sec_to_nanosec(find_setup_worst_negative_slack(setup_analyzer));
     double setup_total_neg_slack = sec_to_nanosec(find_setup_total_negative_slack(setup_analyzer));
 
-    const auto stats = TimingStats(prefix, least_slack_cpd_delay, fmax,
+    const auto stats = TimingStats(prefix.data(), least_slack_cpd_delay, fmax,
                                    setup_worst_neg_slack, setup_total_neg_slack);
     if (!timing_summary_filename.empty())
         write_setup_timing_summary(timing_summary_filename, stats);
 
-    VTR_LOG("%scritical path delay (least slack): %g ns", prefix.c_str(), least_slack_cpd_delay);
+    VTR_LOG("%scritical path delay (least slack): %g ns", prefix.data(), least_slack_cpd_delay);
 
     if (crit_paths.size() == 1) {
         //Fmax is only meaningful for a single-clock circuit
@@ -305,11 +306,11 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints,
     }
     VTR_LOG("\n");
 
-    VTR_LOG("%ssetup Worst Negative Slack (sWNS): %g ns\n", prefix.c_str(), setup_worst_neg_slack);
-    VTR_LOG("%ssetup Total Negative Slack (sTNS): %g ns\n", prefix.c_str(), setup_total_neg_slack);
+    VTR_LOG("%ssetup Worst Negative Slack (sWNS): %g ns\n", prefix.data(), setup_worst_neg_slack);
+    VTR_LOG("%ssetup Total Negative Slack (sTNS): %g ns\n", prefix.data(), setup_total_neg_slack);
     VTR_LOG("\n");
 
-    VTR_LOG("%ssetup slack histogram:\n", prefix.c_str());
+    VTR_LOG("%ssetup slack histogram:\n", prefix.data());
     print_histogram(create_setup_slack_histogram(setup_analyzer));
 
     if (crit_paths.size() > 1) {
@@ -317,7 +318,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints,
         VTR_LOG("\n");
 
         //Periods per constraint
-        VTR_LOG("%sintra-domain critical path delays (CPDs):\n", prefix.c_str());
+        VTR_LOG("%sintra-domain critical path delays (CPDs):\n", prefix.data());
         for (const auto& path : crit_paths) {
             if (path.launch_domain() == path.capture_domain()) {
                 VTR_LOG("  %s to %s CPD: %g ns (%g MHz)\n",
@@ -329,7 +330,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints,
         }
         VTR_LOG("\n");
 
-        VTR_LOG("%sinter-domain critical path delays (CPDs):\n", prefix.c_str());
+        VTR_LOG("%sinter-domain critical path delays (CPDs):\n", prefix.data());
         for (const auto& path : crit_paths) {
             if (path.launch_domain() != path.capture_domain()) {
                 VTR_LOG("  %s to %s CPD: %g ns (%g MHz)\n",
@@ -342,7 +343,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints,
         VTR_LOG("\n");
 
         //Slack per constraint
-        VTR_LOG("%sintra-domain worst setup slacks per constraint:\n", prefix.c_str());
+        VTR_LOG("%sintra-domain worst setup slacks per constraint:\n", prefix.data());
         for (const auto& path : crit_paths) {
             if (path.launch_domain() == path.capture_domain()) {
                 VTR_LOG("  %s to %s worst setup slack: %g ns\n",
@@ -353,7 +354,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints,
         }
         VTR_LOG("\n");
 
-        VTR_LOG("%sinter-domain worst setup slacks per constraint:\n", prefix.c_str());
+        VTR_LOG("%sinter-domain worst setup slacks per constraint:\n", prefix.data());
         for (const auto& path : crit_paths) {
             if (path.launch_domain() != path.capture_domain()) {
                 VTR_LOG("  %s to %s worst setup slack: %g ns\n",
@@ -374,7 +375,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints,
         if (path.launch_domain() == path.capture_domain() && !constraints.is_virtual_clock(path.launch_domain())) {
             if (path.delay() == 0.) {
                 VTR_LOG_WARN("%s%s to %s CPD is %g, skipping in geomean and fanout-weighted CPDs\n",
-                             prefix.c_str(),
+                             prefix.data(),
                              constraints.clock_domain_name(path.launch_domain()).c_str(),
                              constraints.clock_domain_name(path.capture_domain()).c_str(),
                              sec_to_nanosec(path.delay()));
@@ -394,11 +395,11 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints,
 
     //Print multi-clock geomeans
     double geomean_intra_domain_cpd = std::numeric_limits<double>::quiet_NaN();
-    if (intra_domain_cpds.size() > 0) {
+    if (!intra_domain_cpds.empty()) {
         geomean_intra_domain_cpd = vtr::geomean(intra_domain_cpds.begin(), intra_domain_cpds.end());
     }
     VTR_LOG("%sgeomean non-virtual intra-domain period: %g ns (%g MHz)\n",
-            prefix.c_str(),
+            prefix.data(),
             sec_to_nanosec(geomean_intra_domain_cpd),
             sec_to_mhz(geomean_intra_domain_cpd));
 
@@ -408,13 +409,13 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints,
     }
 
     double fanout_weighted_geomean_intra_domain_cpd = std::numeric_limits<double>::quiet_NaN();
-    if (fanout_weighted_intra_domain_cpds.size() > 0) {
+    if (!fanout_weighted_intra_domain_cpds.empty()) {
         fanout_weighted_geomean_intra_domain_cpd = vtr::geomean(fanout_weighted_intra_domain_cpds.begin(),
                                                                 fanout_weighted_intra_domain_cpds.end());
     }
 
     VTR_LOG("%sfanout-weighted geomean non-virtual intra-domain period: %g ns (%g MHz)\n",
-            prefix.c_str(),
+            prefix.data(),
             sec_to_nanosec(fanout_weighted_geomean_intra_domain_cpd),
             sec_to_mhz(fanout_weighted_geomean_intra_domain_cpd));
 
@@ -605,20 +606,22 @@ std::vector<HistogramBucket> create_hold_slack_histogram(const tatum::HoldTiming
     return histogram;
 }
 
-void print_hold_timing_summary(const tatum::TimingConstraints& constraints, const tatum::HoldTimingAnalyzer& hold_analyzer, std::string prefix) {
+void print_hold_timing_summary(const tatum::TimingConstraints& constraints,
+                               const tatum::HoldTimingAnalyzer& hold_analyzer,
+                               std::string_view prefix) {
     auto& timing_ctx = g_vpr_ctx.timing();
 
     auto hold_worst_neg_slack = sec_to_nanosec(find_hold_worst_negative_slack(hold_analyzer));
     auto hold_total_neg_slack = sec_to_nanosec(find_hold_total_negative_slack(hold_analyzer));
 
-    VTR_LOG("%shold Worst Negative Slack (hWNS): %g ns\n", prefix.c_str(), hold_worst_neg_slack);
-    VTR_LOG("%shold Total Negative Slack (hTNS): %g ns\n", prefix.c_str(), hold_total_neg_slack);
+    VTR_LOG("%shold Worst Negative Slack (hWNS): %g ns\n", prefix.data(), hold_worst_neg_slack);
+    VTR_LOG("%shold Total Negative Slack (hTNS): %g ns\n", prefix.data(), hold_total_neg_slack);
 
     /*For testing*/
     //VTR_LOG("Hold Total Negative Slack within clbs: %g ns\n", sec_to_nanosec(find_total_negative_slack_within_clb_blocks(hold_analyzer)));
     VTR_LOG("\n");
 
-    VTR_LOG("%shold slack histogram:\n", prefix.c_str());
+    VTR_LOG("%shold slack histogram:\n", prefix.data());
     print_histogram(create_hold_slack_histogram(hold_analyzer));
 
     if (constraints.clock_domains().size() > 1) {
@@ -626,7 +629,7 @@ void print_hold_timing_summary(const tatum::TimingConstraints& constraints, cons
         VTR_LOG("\n");
 
         //Slack per constraint
-        VTR_LOG("%sintra-domain worst hold slacks per constraint:\n", prefix.c_str());
+        VTR_LOG("%sintra-domain worst hold slacks per constraint:\n", prefix.data());
         for (const auto& domain : constraints.clock_domains()) {
             float worst_slack = find_hold_worst_slack(hold_analyzer, domain, domain);
 
@@ -639,7 +642,7 @@ void print_hold_timing_summary(const tatum::TimingConstraints& constraints, cons
         }
         VTR_LOG("\n");
 
-        VTR_LOG("%sinter-domain worst hold slacks per constraint:\n", prefix.c_str());
+        VTR_LOG("%sinter-domain worst hold slacks per constraint:\n", prefix.data());
         for (const auto& launch_domain : constraints.clock_domains()) {
             for (const auto& capture_domain : constraints.clock_domains()) {
                 if (launch_domain != capture_domain) {
@@ -816,13 +819,13 @@ float calc_relaxed_criticality(const std::map<DomainPair, float>& domains_max_re
     return max_crit;
 }
 
-void print_tatum_cpds(std::vector<tatum::TimingPathInfo> cpds) {
+void print_tatum_cpds(const std::vector<tatum::TimingPathInfo>& cpds) {
     for (auto path : cpds) {
         VTR_LOG("Tatum   %zu -> %zu: least_slack=%g cpd=%g\n", size_t(path.launch_domain()), size_t(path.capture_domain()), float(path.slack()), float(path.delay()));
     }
 }
 
-tatum::NodeId id_or_pin_name_to_tnode(std::string pin_name_or_tnode) {
+tatum::NodeId id_or_pin_name_to_tnode(const std::string& pin_name_or_tnode) {
     std::istringstream ss(pin_name_or_tnode);
     int id;
     if (ss >> id) { //Successfully converted
@@ -837,7 +840,7 @@ tatum::NodeId id_or_pin_name_to_tnode(std::string pin_name_or_tnode) {
     return pin_name_to_tnode(pin_name_or_tnode);
 }
 
-tatum::NodeId pin_name_to_tnode(std::string pin_name) {
+tatum::NodeId pin_name_to_tnode(const std::string& pin_name) {
     auto& atom_ctx = g_vpr_ctx.atom();
 
     AtomPinId pin = atom_ctx.nlist.find_pin(pin_name);
@@ -855,7 +858,7 @@ tatum::NodeId pin_name_to_tnode(std::string pin_name) {
     return tnode;
 }
 
-void write_setup_timing_graph_dot(std::string filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node) {
+void write_setup_timing_graph_dot(const std::string& filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node) {
     auto& timing_graph = *timing_info.timing_graph();
 
     auto dot_writer = tatum::make_graphviz_dot_writer(timing_graph, *timing_info.delay_calculator());
@@ -874,7 +877,7 @@ void write_setup_timing_graph_dot(std::string filename, const SetupTimingInfo& t
     dot_writer.write_dot_file(filename, *timing_info.setup_analyzer());
 }
 
-void write_hold_timing_graph_dot(std::string filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node) {
+void write_hold_timing_graph_dot(const std::string& filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node) {
     auto& timing_graph = *timing_info.timing_graph();
 
     auto dot_writer = tatum::make_graphviz_dot_writer(timing_graph, *timing_info.delay_calculator());
diff --git a/vpr/src/timing/timing_util.h b/vpr/src/timing/timing_util.h
index 0a04a132f44..e0d011214ba 100644
--- a/vpr/src/timing/timing_util.h
+++ b/vpr/src/timing/timing_util.h
@@ -1,6 +1,7 @@
 #ifndef VPR_TIMING_UTIL_H
 #define VPR_TIMING_UTIL_H
 #include <vector>
+#include <string_view>
 
 #include "netlist_fwd.h"
 #include "tatum/timing_analyzers.hpp"
@@ -49,7 +50,10 @@ std::vector<HistogramBucket> create_criticality_histogram(const Netlist<>& net_l
                                                           size_t num_bins = 10);
 
 //Print a useful summary of timing information
-void print_setup_timing_summary(const tatum::TimingConstraints& constraints, const tatum::SetupTimingAnalyzer& setup_analyzer, std::string prefix, std::string timing_summary_filename);
+void print_setup_timing_summary(const tatum::TimingConstraints& constraints,
+                                const tatum::SetupTimingAnalyzer& setup_analyzer,
+                                std::string_view prefix,
+                                std::string_view timing_summary_filename);
 
 /*
  * Hold-time related statistics
@@ -67,7 +71,9 @@ float find_hold_worst_slack(const tatum::HoldTimingAnalyzer& hold_analyzer, cons
 std::vector<HistogramBucket> create_hold_slack_histogram(const tatum::HoldTimingAnalyzer& hold_analyzer, size_t num_bins = 10);
 
 //Print a useful summary of timing information
-void print_hold_timing_summary(const tatum::TimingConstraints& constraints, const tatum::HoldTimingAnalyzer& hold_analyzer, std::string prefix);
+void print_hold_timing_summary(const tatum::TimingConstraints& constraints,
+                               const tatum::HoldTimingAnalyzer& hold_analyzer,
+                               std::string_view prefix);
 
 float find_total_negative_slack_within_clb_blocks(const tatum::HoldTimingAnalyzer& hold_analyzer);
 
@@ -114,13 +120,13 @@ float calc_relaxed_criticality(const std::map<DomainPair, float>& domains_max_re
 /*
  * Debug
  */
-void print_tatum_cpds(std::vector<tatum::TimingPathInfo> cpds);
+void print_tatum_cpds(const std::vector<tatum::TimingPathInfo>& cpds);
 
-tatum::NodeId id_or_pin_name_to_tnode(std::string name_or_id);
-tatum::NodeId pin_name_to_tnode(std::string name);
+tatum::NodeId id_or_pin_name_to_tnode(const std::string& name_or_id);
+tatum::NodeId pin_name_to_tnode(const std::string& name);
 
-void write_setup_timing_graph_dot(std::string filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID());
-void write_hold_timing_graph_dot(std::string filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID());
+void write_setup_timing_graph_dot(const std::string& filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID());
+void write_hold_timing_graph_dot(const std::string& filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID());
 
 struct TimingStats {
   private:
@@ -147,6 +153,6 @@ struct TimingStats {
 };
 
 //Write a useful summary of timing information to JSON file
-void write_setup_timing_summary(std::string timing_summary_filename, const TimingStats& stats);
+void write_setup_timing_summary(std::string_view timing_summary_filename, const TimingStats& stats);
 
 #endif

From e70f43c7ef257bbbbd8a53a5e67dbb706cce769d Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sun, 24 Nov 2024 14:05:08 -0500
Subject: [PATCH 21/32] add PlacementContext& arg to copy_locs_to_global_state
 && use pragma once in header files

---
 vpr/src/place/place.cpp        |  2 +-
 vpr/src/place/place_log_util.h |  5 +----
 vpr/src/place/placer.cpp       |  4 +---
 vpr/src/place/placer.h         | 10 ++++------
 4 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index c0257d939c0..d1f43af4e05 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -98,7 +98,7 @@ void try_place(const Netlist<>& net_list,
 
     free_placement_structs();
 
-    placer.copy_locs_to_global_state();
+    placer.copy_locs_to_global_state(place_ctx);
 }
 
 static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h
index d01d3f4b6dc..c83b3a0f36d 100644
--- a/vpr/src/place/place_log_util.h
+++ b/vpr/src/place/place_log_util.h
@@ -1,6 +1,5 @@
 
-#ifndef VTR_PLACE_LOG_UTIL_H
-#define VTR_PLACE_LOG_UTIL_H
+#pragma once
 
 #include <cstddef>
 #include <vector>
@@ -40,5 +39,3 @@ void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
                                         const PlacementDelayCalculator& delay_calc,
                                         bool is_flat,
                                         const BlkLocRegistry& blk_loc_registry);
-
-#endif //VTR_PLACE_LOG_UTIL_H
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 2de33c88791..26f7ca2f756 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -386,9 +386,7 @@ void Placer::place() {
     log_printer_.print_post_placement_stats();
 }
 
-void Placer::copy_locs_to_global_state() {
-    auto& place_ctx = g_vpr_ctx.mutable_placement();
-
+void Placer::copy_locs_to_global_state(PlacementContext& place_ctx) {
     // the placement location variables should be unlocked before being accessed
     place_ctx.unlock_loc_vars();
 
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index e9be736d4c7..a01791d87a8 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -1,7 +1,5 @@
 
-
-#ifndef VTR_PLACER_H
-#define VTR_PLACER_H
+#pragma once
 
 #include <memory>
 #include <optional>
@@ -34,9 +32,10 @@ class Placer {
     void place();
 
     /**
-     * @brief Copies the placement location variables into the global placement context.
+     * @brief Copies the placement location variables into the given global placement context.
+     * @param place_ctx The placement context to which location information will be copied.
      */
-    void copy_locs_to_global_state();
+    void copy_locs_to_global_state(PlacementContext& place_ctx);
 
     /*
      * Getters
@@ -126,4 +125,3 @@ class Placer {
     int check_placement_costs_();
 };
 
-#endif //VTR_PLACER_H

From 39ad04ebefda8df739fcd3202fafddb3ec1ede7b Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sun, 24 Nov 2024 14:25:38 -0500
Subject: [PATCH 22/32] delete free_placement_structs()

---
 vpr/src/place/place.cpp | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index d1f43af4e05..f00a2100a76 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -36,8 +36,6 @@ void print_clb_placement(const char* fname);
 static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
                        const RRGraphView& rr_graph);
 
-static void free_placement_structs();
-
 /*****************************************************************************/
 void try_place(const Netlist<>& net_list,
                const t_placer_opts& placer_opts,
@@ -96,8 +94,12 @@ void try_place(const Netlist<>& net_list,
 
     placer.place();
 
-    free_placement_structs();
+    vtr::release_memory(place_ctx.compressed_block_grids);
 
+    /* The placer object has its own copy of block locations and doesn't update
+     * the global context directly. We need to copy its internal data structures
+     * to the global placement context before it goes out of scope.
+     */
     placer.copy_locs_to_global_state(place_ctx);
 }
 
@@ -126,13 +128,6 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
     return cube_bb;
 }
 
-/* Frees the major structures needed by the placer (and not needed
- * elsewhere).   */
-static void free_placement_structs() {
-    auto& place_ctx = g_vpr_ctx.mutable_placement();
-    vtr::release_memory(place_ctx.compressed_block_grids);
-}
-
 #ifdef VERBOSE
 void print_clb_placement(const char* fname) {
     /* Prints out the clb placements to a file.  */

From e93d89872351a8c7a56d0c2b185372a47629c26a Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sun, 24 Nov 2024 14:43:12 -0500
Subject: [PATCH 23/32] add file comments for placer.h and place_log_util.h

---
 vpr/src/place/place.h          |  6 ++----
 vpr/src/place/place_log_util.h | 17 +++++++++++++++++
 vpr/src/place/placer.h         | 16 ++++++++++++++++
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/place.h b/vpr/src/place/place.h
index 210663823a8..e4a0172ba4e 100644
--- a/vpr/src/place/place.h
+++ b/vpr/src/place/place.h
@@ -1,5 +1,5 @@
-#ifndef VPR_PLACE_H
-#define VPR_PLACE_H
+
+#pragma once
 
 #include "vpr_types.h"
 
@@ -13,5 +13,3 @@ void try_place(const Netlist<>& net_list,
                std::vector<t_segment_inf>& segment_inf,
                const std::vector<t_direct_inf>& directs,
                bool is_flat);
-
-#endif
diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h
index c83b3a0f36d..8c437a922fa 100644
--- a/vpr/src/place/place_log_util.h
+++ b/vpr/src/place/place_log_util.h
@@ -1,3 +1,20 @@
+/**
+ * @file placement_log_printer.h
+ * @brief Declares the PlacementLogPrinter class and associated utilities for logging
+ * and reporting placement-related statistics and timing analysis results.
+ *
+ * This file provides tools to monitor and report the progress and results of the placement stage.
+ *
+ * ### Key Components:
+ * - **PlacementLogPrinter**:
+ *   - A utility class for logging placement status, resource utilization, and swap statistics.
+ *   - Prints detailed statistics during the placement process, including initial and post-placement states.
+ *   - Supports a "quiet mode" to suppress output.
+ *
+ * ### Integration:
+ * The tools in this file integrate with the Placer class to provide information about
+ * the placement process for debugging, optimization, and analysis purposes.
+ */
 
 #pragma once
 
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index a01791d87a8..ede938482f7 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -1,3 +1,19 @@
+/**
+ * @file placer.h
+ * @brief Declares the Placer class, which encapsulates the functionality, data structures,
+ * and algorithms required for the placement stage.
+ *
+ * The Placer class initializes necessary objects, performs an initial placement,
+ * and runs simulated annealing optimization. This optimization minimizes
+ * wirelength (bounding box) and timing costs to achieve an efficient placement solution.
+ *
+ * Key features of the Placer class:
+ * - Encapsulates all placement-related variables, cost functions, and data structures.
+ * - Supports optional NoC (Network-on-Chip) cost optimizations if enabled.
+ * - Interfaces with timing analysis, placement delay calculation.
+ * - Provides a mechanism for checkpointing the placement state.
+ * - Includes debugging and validation utilities to verify the correctness of placement.
+ */
 
 #pragma once
 

From 877fd8eb2c02f050eabca707abcf9e1f7e418efa Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sun, 24 Nov 2024 16:31:11 -0500
Subject: [PATCH 24/32] remove accessor methods from Placer

---
 vpr/src/place/place_log_util.cpp | 49 ++++++++++++++------------------
 vpr/src/place/placer.cpp         | 32 ---------------------
 vpr/src/place/placer.h           | 22 +-------------
 3 files changed, 23 insertions(+), 80 deletions(-)

diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp
index 29ba2b917a7..aa02ed96b40 100644
--- a/vpr/src/place/place_log_util.cpp
+++ b/vpr/src/place/place_log_util.cpp
@@ -23,7 +23,7 @@ void PlacementLogPrinter::print_place_status_header() const {
         return;
     }
 
-    const bool noc_enabled = placer_.noc_opts().noc;
+    const bool noc_enabled = placer_.noc_opts_.noc;
 
     VTR_LOG("\n");
     if (!noc_enabled) {
@@ -52,20 +52,20 @@ void PlacementLogPrinter::print_place_status(float elapsed_sec) const {
         return;
     }
 
-    const PlacementAnnealer& annealer = placer_.annealer();
+    const PlacementAnnealer& annealer = *placer_.annealer_;
     const t_annealing_state& annealing_state = annealer.get_annealing_state();
     const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats();
     const int tot_moves = annealer.get_total_iteration();
-    const t_placer_costs& costs = placer_.costs();
-    std::shared_ptr<const SetupTimingInfo> timing_info = placer_.timing_info();
+    const t_placer_costs& costs = placer_.costs_;
+    std::shared_ptr<const SetupTimingInfo> timing_info = placer_.timing_info_;
 
-    const bool noc_enabled = placer_.noc_opts().noc;
-    const NocCostTerms& noc_cost_terms = placer_.costs().noc_cost_terms;
+    const bool noc_enabled = placer_.noc_opts_.noc;
+    const NocCostTerms& noc_cost_terms = placer_.costs_.noc_cost_terms;
 
-    const bool is_timing_driven = placer_.placer_opts().place_algorithm.is_timing_driven();
-    const float cpd = is_timing_driven ? placer_.critical_path().delay() : std::numeric_limits<float>::quiet_NaN();
-    const float sTNS = is_timing_driven ? placer_.timing_info()->setup_total_negative_slack() : std::numeric_limits<float>::quiet_NaN();
-    const float sWNS = is_timing_driven ? placer_.timing_info()->setup_worst_negative_slack() : std::numeric_limits<float>::quiet_NaN();
+    const bool is_timing_driven = placer_.placer_opts_.place_algorithm.is_timing_driven();
+    const float cpd = is_timing_driven ? placer_.critical_path_.delay() : std::numeric_limits<float>::quiet_NaN();
+    const float sTNS = is_timing_driven ? placer_.timing_info_->setup_total_negative_slack() : std::numeric_limits<float>::quiet_NaN();
+    const float sWNS = is_timing_driven ? placer_.timing_info_->setup_worst_negative_slack() : std::numeric_limits<float>::quiet_NaN();
 
     VTR_LOG(
         "%4zu %6.1f %7.1e "
@@ -105,7 +105,7 @@ void PlacementLogPrinter::print_resources_utilization() const {
 
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& device_ctx = g_vpr_ctx.device();
-    const auto& block_locs = placer_.placer_state().block_locs();
+    const auto& block_locs = placer_.placer_state_.block_locs();
 
     size_t max_block_name = 0;
     size_t max_tile_name = 0;
@@ -144,7 +144,7 @@ void PlacementLogPrinter::print_placement_swaps_stats() const {
         return;
     }
 
-    const PlacementAnnealer& annealer = placer_.annealer();
+    const PlacementAnnealer& annealer = *placer_.annealer_;
     const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats();
     const t_annealing_state& annealing_state = annealer.get_annealing_state();
 
@@ -170,25 +170,21 @@ void PlacementLogPrinter::print_initial_placement_stats() const {
         return;
     }
 
-    const t_placer_costs& costs = placer_.costs();
-    const t_noc_opts& noc_opts = placer_.noc_opts();
-    const t_placer_opts& placer_opts = placer_.placer_opts();
-    const tatum::TimingPathInfo& critical_path = placer_.critical_path();
-    const std::optional<NocCostHandler>& noc_cost_handler = placer_.noc_cost_handler();
-    std::shared_ptr<const SetupTimingInfo> timing_info = placer_.timing_info();
-    const PlacerState& placer_state = placer_.placer_state();
+    const t_placer_costs& costs = placer_.costs_;
+    const t_placer_opts& placer_opts = placer_.placer_opts_;
+    std::shared_ptr<const SetupTimingInfo> timing_info = placer_.timing_info_;
 
     VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n",
         costs.cost, costs.bb_cost, costs.timing_cost);
 
-    if (noc_opts.noc) {
-        VTR_ASSERT(noc_cost_handler.has_value());
-        noc_cost_handler->print_noc_costs("Initial NoC Placement Costs", costs, noc_opts);
+    if (placer_.noc_opts_.noc) {
+        VTR_ASSERT(placer_.noc_cost_handler_.has_value());
+        placer_.noc_cost_handler_->print_noc_costs("Initial NoC Placement Costs", costs, placer_.noc_opts_);
     }
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
         VTR_LOG("Initial placement estimated Critical Path Delay (CPD): %g ns\n",
-                1e9 * critical_path.delay());
+                1e9 * placer_.critical_path_.delay());
         VTR_LOG("Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n",
                 1e9 * timing_info->setup_total_negative_slack());
         VTR_LOG("Initial placement estimated setup Worst Negative Slack (sWNS): %g ns\n",
@@ -198,7 +194,7 @@ void PlacementLogPrinter::print_initial_placement_stats() const {
         print_histogram(create_setup_slack_histogram(*timing_info->setup_analyzer()));
     }
 
-    const BlkLocRegistry& blk_loc_registry = placer_state.blk_loc_registry();
+    const BlkLocRegistry& blk_loc_registry = placer_.placer_state_.blk_loc_registry();
     size_t num_macro_members = 0;
     for (const t_pl_macro& macro : blk_loc_registry.place_macros().macros()) {
         num_macro_members += macro.members.size();
@@ -228,12 +224,11 @@ void PlacementLogPrinter::print_post_placement_stats() const {
     }
 
     const auto& timing_ctx = g_vpr_ctx.timing();
-    const PlacementAnnealer& annealer = placer_.annealer();
-    const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats();
+    const auto& [swap_stats, move_type_stats, placer_stats] = placer_.annealer_->get_stats();
 
     VTR_LOG("\n");
     VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called);
-    annealer.get_move_abortion_logger().report_aborted_moves();
+    placer_.annealer_->get_move_abortion_logger().report_aborted_moves();
 
     if (placer_.placer_opts_.place_algorithm.is_timing_driven()) {
         //Final timing estimate
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 26f7ca2f756..3a56a4e03e5 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -399,35 +399,3 @@ void Placer::copy_locs_to_global_state(PlacementContext& place_ctx) {
     get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry);
 #endif
 }
-
-const PlacementAnnealer& Placer::annealer() const {
-    return *annealer_;
-}
-
-const t_placer_opts& Placer::placer_opts() const {
-    return placer_opts_;
-}
-
-const t_noc_opts& Placer::noc_opts() const {
-    return noc_opts_;
-}
-
-const t_placer_costs& Placer::costs() const {
-    return costs_;
-}
-
-const tatum::TimingPathInfo& Placer::critical_path() const {
-    return critical_path_;
-}
-
-std::shared_ptr<const SetupTimingInfo> Placer::timing_info() const {
-    return timing_info_;
-}
-
-const PlacerState& Placer::placer_state() const {
-    return placer_state_;
-}
-
-const std::optional<NocCostHandler>& Placer::noc_cost_handler() const {
-    return noc_cost_handler_;
-}
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index ede938482f7..a6e6f35cf46 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -53,25 +53,6 @@ class Placer {
      */
     void copy_locs_to_global_state(PlacementContext& place_ctx);
 
-    /*
-     * Getters
-     */
-    const PlacementAnnealer& annealer() const;
-
-    const t_placer_opts& placer_opts() const;
-
-    const t_noc_opts& noc_opts() const;
-
-    const t_placer_costs& costs() const;
-
-    const tatum::TimingPathInfo& critical_path() const;
-
-    std::shared_ptr<const SetupTimingInfo> timing_info() const;
-
-    const PlacerState& placer_state() const;
-
-    const std::optional<NocCostHandler>& noc_cost_handler() const;
-
   private:
     /// Holds placement algorithm parameters
     const t_placer_opts& placer_opts_;
@@ -117,7 +98,7 @@ class Placer {
     t_timing_analysis_profile_info pre_quench_timing_stats_;
     t_timing_analysis_profile_info post_quench_timing_stats_;
 
-    friend void PlacementLogPrinter::print_post_placement_stats() const;
+    friend class PlacementLogPrinter;
 
   private:
     void alloc_and_init_timing_objects_(const Netlist<>& net_list,
@@ -140,4 +121,3 @@ class Placer {
      */
     int check_placement_costs_();
 };
-

From b760d03da5a87de39b0098bc529fd5112f5a78f4 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sun, 24 Nov 2024 16:38:07 -0500
Subject: [PATCH 25/32] quiet the placement timer if the placer object is quiet

---
 libs/libvtrutil/src/vtr_time.h | 2 +-
 vpr/src/place/place.cpp        | 5 +----
 vpr/src/place/placer.cpp       | 2 ++
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/libs/libvtrutil/src/vtr_time.h b/libs/libvtrutil/src/vtr_time.h
index 4e389ef5026..3f187e59288 100644
--- a/libs/libvtrutil/src/vtr_time.h
+++ b/libs/libvtrutil/src/vtr_time.h
@@ -36,7 +36,7 @@ class Timer {
     constexpr static float BYTE_TO_MIB = 1024 * 1024;
 };
 
-///@brief Scoped time class which prints the time elapsed for the specifid action
+///@brief Scoped time class which prints the time elapsed for the specified action
 class ScopedActionTimer : public Timer {
   public:
     ScopedActionTimer(std::string action);
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index f00a2100a76..86d1f374212 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1,3 +1,4 @@
+
 #include <memory>
 
 #include "vtr_assert.h"
@@ -47,10 +48,6 @@ void try_place(const Netlist<>& net_list,
                std::vector<t_segment_inf>& segment_inf,
                const std::vector<t_direct_inf>& directs,
                bool is_flat) {
-    /* Does almost all the work of placing a circuit.  Width_fac gives the   *
-     * width of the widest channel.  Place_cost_exp says what exponent the   *
-     * width should be taken to when calculating costs.  This allows a       *
-     * greater bias for anisotropic architectures.                           */
 
     /* Currently, the functions that require is_flat as their parameter and are called during placement should
      * receive is_flat as false. For example, if the RR graph of router lookahead is built here, it should be as
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 3a56a4e03e5..12dc5276646 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -51,6 +51,7 @@ Placer::Placer(const Netlist<>& net_list,
 
     // Start measuring placement time
     timer_ = std::make_unique<vtr::ScopedStartFinishTimer>("Placement");
+    timer_->quiet(quiet);
 
     /* To make sure the importance of NoC-related cost terms compared to
      * BB and timing cost is determine only through NoC placement weighting factor,
@@ -101,6 +102,7 @@ Placer::Placer(const Netlist<>& net_list,
        }
 #endif
 
+       // width_fac gives the width of the widest channel
        const int width_fac = placer_opts.place_chan_width;
        init_draw_coords((float)width_fac, placer_state_.blk_loc_registry());
    }

From aa53d975fb3be4f18de2c797aedbade434f96621 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 27 Nov 2024 15:47:57 -0500
Subject: [PATCH 26/32] add some comments

---
 vpr/src/place/place.cpp          |  8 ++++++-
 vpr/src/place/place_log_util.cpp |  1 +
 vpr/src/place/placer.cpp         | 16 ++-----------
 vpr/src/place/placer.h           | 41 +++++++++++++++++++++++++++++++-
 vpr/src/timing/timing_info.h     |  2 +-
 5 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 86d1f374212..b090e46d0a5 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -57,7 +57,8 @@ void try_place(const Netlist<>& net_list,
     const auto& device_ctx = g_vpr_ctx.device();
 
     /* Placement delay model is independent of the placement and can be shared across
-     * multiple placers. So, it is created and initialized once. */
+     * multiple placers if we are performing parallel annealing.
+     * So, it is created and initialized once. */
     std::shared_ptr<PlaceDelayModel> place_delay_model;
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
@@ -84,6 +85,11 @@ void try_place(const Netlist<>& net_list,
     VTR_LOG("\n");
 
     auto& place_ctx = g_vpr_ctx.mutable_placement();
+
+    /* Make the global instance of BlkLocRegistry inaccessible through the getter methods of the
+     * placement context. This is done to make sure that the placement stage only accesses its
+     * own local instances of BlkLocRegistry.
+     */
     place_ctx.lock_loc_vars();
     place_ctx.compressed_block_grids = create_compressed_block_grids();
 
diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp
index aa02ed96b40..d825a3af093 100644
--- a/vpr/src/place/place_log_util.cpp
+++ b/vpr/src/place/place_log_util.cpp
@@ -165,6 +165,7 @@ void PlacementLogPrinter::print_placement_swaps_stats() const {
     VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits,
             swap_stats.num_swap_aborted, 100 * abort_rate);
 }
+
 void PlacementLogPrinter::print_initial_placement_stats() const {
     if (quiet_) {
         return;
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 12dc5276646..3ad4d37ddd6 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -79,7 +79,7 @@ Placer::Placer(const Netlist<>& net_list,
 
 #ifdef ENABLE_ANALYTIC_PLACE
     /*
-     * Analytic Placer:
+     * Cluster-level Analytic Placer:
      *  Passes in the initial_placement via vpr_context, and passes its placement back via locations marked on
      *  both the clb_netlist and the gird.
      *  Most of anneal is disabled later by setting initial temperature to 0 and only further optimizes in quench
@@ -284,7 +284,7 @@ void Placer::place() {
 #endif
 
    if (!skip_anneal) {
-       //Table header
+       // Table header
        log_printer_.print_place_status_header();
 
        // Outer loop of the simulated annealing begins
@@ -309,12 +309,6 @@ void Placer::place() {
 
            log_printer_.print_place_status(temperature_timer.elapsed_sec());
 
-           //#ifdef VERBOSE
-           //            if (getEchoEnabled()) {
-           //                print_clb_placement("first_iteration_clb_placement.echo");
-           //            }
-           //#endif
-
            // Outer loop of the simulated annealing ends
        } while (annealer_->outer_loop_update_state());
    } //skip_anneal ends
@@ -372,12 +366,6 @@ void Placer::place() {
        print_place(nullptr, nullptr, filename.c_str(), placer_state_.mutable_block_locs());
     }
 
-    //#ifdef VERBOSE
-    //    if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_END_CLB_PLACEMENT)) {
-    //        print_clb_placement(getEchoFileName(E_ECHO_END_CLB_PLACEMENT));
-    //    }
-    //#endif
-
     // Update physical pin values
     for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) {
        placer_state_.mutable_blk_loc_registry().place_sync_external_block_connections(block_id);
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index a6e6f35cf46..412b0c040a5 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -79,12 +79,17 @@ class Placer {
 
     /// Stores a placement state as a retrievable checkpoint in case the placement quality deteriorates later.
     t_placement_checkpoint placement_checkpoint_;
-
+    /// It holds a setup timing analysis engine. Other placement timing object usually have a reference or pointer to timing_info.
     std::shared_ptr<SetupTimingInfo> timing_info_;
+    /// Post-clustering delay calculator. Its API allows extraction of delay for each timing edge.
     std::shared_ptr<PlacementDelayCalculator> placement_delay_calc_;
+    /// Stores setup slack of the clustered netlist connections.
     std::unique_ptr<PlacerSetupSlacks> placer_setup_slacks_;
+    /// Stores criticalities of the clustered netlist connections.
     std::unique_ptr<PlacerCriticalities> placer_criticalities_;
+    /// Used to invalidate timing edges corresponding to the pins of moved blocks.
     std::unique_ptr<NetPinTimingInvalidator> pin_timing_invalidator_;
+    /// Stores information about the critical path. This is usually updated after that timing info is updated.
     tatum::TimingPathInfo critical_path_;
 
     std::unique_ptr<vtr::ScopedStartFinishTimer> timer_;
@@ -92,15 +97,49 @@ class Placer {
     IntraLbPbPinLookup pb_gpin_lookup_;
     ClusteredPinAtomPinsLookup netlist_pin_lookup_;
 
+    /// Performs random swaps and implements the simulated annealer optimizer.
     std::unique_ptr<PlacementAnnealer> annealer_;
 
+    /* These variables store timing analysis profiling information
+     * at different stages of the placement to be printed at the end
+     */
     t_timing_analysis_profile_info pre_place_timing_stats_;
     t_timing_analysis_profile_info pre_quench_timing_stats_;
     t_timing_analysis_profile_info post_quench_timing_stats_;
 
+    /* PlacementLogPrinter is made a friend of this class, so it can
+     * access its private member variables without getter methods.
+     * PlacementLogPrinter holds a constant reference to an object of type
+     * Placer to avoid modifying its member variables.
+     */
     friend class PlacementLogPrinter;
 
   private:
+    /**
+     * @brief Constructs and initializes timing-related objects.
+     *
+     * This function performs the following steps to set up timing analysis:
+     *
+     * 1. Constructs a `tatum::DelayCalculator` for post-clustering delay calculations.
+     *    This calculator holds a reference to `PlacerTimingContext::connection_delay`,
+     *    which contains net delays based on block locations.
+     *
+     * 2. Creates and stores a `SetupTimingInfo` object in `timing_info_`.
+     *    This object utilizes the delay calculator to compute delays on timing edges
+     *    and calculate setup times.
+     *
+     * 3. Constructs `PlacerSetupSlacks` and `PlacerCriticalities` objects,
+     *    which translate arrival and required times into slacks and criticalities,
+     *    respectively.
+     *
+     * 4. Creates a `NetPinTimingInvalidator` object to mark timing edges
+     *    corresponding to the pins of moved blocks as invalid.
+     *
+     * 5. Performs a full timing analysis by marking all pins as invalid.
+     *
+     * @param net_list The netlist used for iterating over pins.
+     * @param analysis_opts Analysis options, including whether to echo the timing graph.
+     */
     void alloc_and_init_timing_objects_(const Netlist<>& net_list,
                                         const t_analysis_opts& analysis_opts);
 
diff --git a/vpr/src/timing/timing_info.h b/vpr/src/timing/timing_info.h
index 14d3b08f939..836c95e50d7 100644
--- a/vpr/src/timing/timing_info.h
+++ b/vpr/src/timing/timing_info.h
@@ -62,7 +62,7 @@ class SetupTimingInfo : public virtual TimingInfo {
     //Return the critical path with the least slack
     virtual tatum::TimingPathInfo least_slack_critical_path() const = 0;
 
-    //Return the critical path the the longest absolute delay
+    //Return the critical path the longest absolute delay
     virtual tatum::TimingPathInfo longest_critical_path() const = 0;
 
     //Return the set of critical paths between all clock domain pairs

From 726f3768482768f45eb26a7b32137d9983becb23 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 27 Nov 2024 17:43:47 -0500
Subject: [PATCH 27/32] store pointers to TimingInfo in PlacerSetupSlacks and
 PlacerCriticalities and NetPinTimingInvalidator

---
 vpr/src/place/annealer.cpp               |  6 ++---
 vpr/src/place/place_timing_update.cpp    |  6 ++---
 vpr/src/place/placer.cpp                 | 10 +++++---
 vpr/src/place/placer.h                   |  5 ++--
 vpr/src/place/timing_place.cpp           | 31 ++++++++++++++----------
 vpr/src/place/timing_place.h             | 23 ++++++++++++------
 vpr/src/route/route.cpp                  |  2 +-
 vpr/src/route/route_net.h                |  2 +-
 vpr/src/timing/NetPinTimingInvalidator.h | 30 ++++++++++++-----------
 vpr/src/timing/timing_info.h             |  2 +-
 10 files changed, 68 insertions(+), 49 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 1d8836956ab..b18f60b27bd 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -471,7 +471,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
              */
 
             // Invalidates timing of modified connections for incremental timing updates.
-            pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_);
+            pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_);
 
             /* Update the connection_timing_cost and connection_delay
              * values from the temporary values. */
@@ -532,7 +532,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
                 /* Invalidates timing of modified connections for incremental
                  * timing updates. These invalidations are accumulated for a
                  * big timing update in the outer loop. */
-                pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_);
+                pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_);
 
                 /* Update the connection_timing_cost and connection_delay
                  * values from the temporary values. */
@@ -588,7 +588,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
                 /* Re-invalidate the affected sink pins since the proposed
                  * move is rejected, and the same blocks are reverted to
                  * their original positions. */
-                pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_);
+                pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_);
 
                 // Revert the timing update
                 update_timing_classes(crit_params, timing_info_, criticalities_,
diff --git a/vpr/src/place/place_timing_update.cpp b/vpr/src/place/place_timing_update.cpp
index d558f386c4b..c9c53b88f90 100644
--- a/vpr/src/place/place_timing_update.cpp
+++ b/vpr/src/place/place_timing_update.cpp
@@ -45,7 +45,7 @@ void initialize_timing_info(const PlaceCritParams& crit_params,
     //by passing in all the clb sink pins
     for (ClusterNetId net_id : clb_nlist.nets()) {
         for (ClusterPinId pin_id : clb_nlist.net_sinks(net_id)) {
-            pin_timing_invalidator->invalidate_connection(pin_id, timing_info);
+            pin_timing_invalidator->invalidate_connection(pin_id);
         }
     }
 
@@ -142,10 +142,10 @@ void update_timing_classes(const PlaceCritParams& crit_params,
     timing_info->update();
 
     /* Update the placer's criticalities (e.g. sharpen with crit_exponent). */
-    criticalities->update_criticalities(timing_info, crit_params, placer_state);
+    criticalities->update_criticalities(crit_params, placer_state);
 
     /* Update the placer's raw setup slacks. */
-    setup_slacks->update_setup_slacks(timing_info);
+    setup_slacks->update_setup_slacks();
 
     /* Clear invalidation state. */
     pin_timing_invalidator->reset();
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 3ad4d37ddd6..ab32b8a6890 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -172,16 +172,20 @@ void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list,
 
    timing_info_ = make_setup_timing_info(placement_delay_calc_, placer_opts_.timing_update_type);
 
-   placer_setup_slacks_ = std::make_unique<PlacerSetupSlacks>(cluster_ctx.clb_nlist, netlist_pin_lookup_);
+   placer_setup_slacks_ = std::make_unique<PlacerSetupSlacks>(cluster_ctx.clb_nlist,
+                                                              netlist_pin_lookup_,
+                                                              timing_info_);
 
-   placer_criticalities_ = std::make_unique<PlacerCriticalities>(cluster_ctx.clb_nlist, netlist_pin_lookup_);
+   placer_criticalities_ = std::make_unique<PlacerCriticalities>(cluster_ctx.clb_nlist,
+                                                                 netlist_pin_lookup_,
+                                                                 timing_info_);
 
    pin_timing_invalidator_ = make_net_pin_timing_invalidator(placer_opts_.timing_update_type,
                                                              net_list,
                                                              netlist_pin_lookup_,
                                                              atom_ctx.nlist,
                                                              atom_ctx.lookup,
-                                                             *timing_info_->timing_graph(),
+                                                             timing_info_,
                                                              is_flat_);
 
    // First time compute timing and costs, compute from scratch
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index 412b0c040a5..66692b8ca1d 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -130,10 +130,11 @@ class Placer {
      *
      * 3. Constructs `PlacerSetupSlacks` and `PlacerCriticalities` objects,
      *    which translate arrival and required times into slacks and criticalities,
-     *    respectively.
+     *    respectively. These objects hold pointers to timing_info_.
      *
      * 4. Creates a `NetPinTimingInvalidator` object to mark timing edges
-     *    corresponding to the pins of moved blocks as invalid.
+     *    corresponding to the pins of moved blocks as invalid. This object
+     *    holds a pointer to timing_info_.
      *
      * 5. Performs a full timing analysis by marking all pins as invalid.
      *
diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp
index 021bb6211fb..badd9d1fb61 100644
--- a/vpr/src/place/timing_place.cpp
+++ b/vpr/src/place/timing_place.cpp
@@ -16,9 +16,12 @@
 #include "timing_info.h"
 
 ///@brief Allocates space for the timing_place_crit_ data structure.
-PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup)
+PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist,
+                                         const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+                                         std::shared_ptr<const SetupTimingInfo> timing_info)
     : clb_nlist_(clb_nlist)
     , pin_lookup_(netlist_pin_lookup)
+    , timing_info_(std::move(timing_info))
     , timing_place_crit_(make_net_pins_matrix(clb_nlist_, std::numeric_limits<float>::quiet_NaN())) {
 }
 
@@ -32,8 +35,7 @@ PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, cons
  *
  * If the criticality exponent has changed, we also need to update from scratch.
  */
-void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_info,
-                                               const PlaceCritParams& crit_params,
+void PlacerCriticalities::update_criticalities(const PlaceCritParams& crit_params,
                                                PlacerState& placer_state) {
     /* If update is not enabled, exit the routine. */
     if (!update_enabled) {
@@ -44,7 +46,7 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf
 
     /* Determine what pins need updating */
     if (!recompute_required && crit_params.crit_exponent == last_crit_exponent_) {
-        incr_update_criticalities(timing_info);
+        incr_update_criticalities();
     } else {
         recompute_criticalities();
 
@@ -63,7 +65,7 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf
         ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin);
         int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin);
         // Routing for placement is not flat (at least for the time being)
-        float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info, pin_lookup_, ParentPinId(size_t(clb_pin)), /*is_flat=*/false);
+        float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info_, pin_lookup_, ParentPinId(size_t(clb_pin)), /*is_flat=*/false);
 
         float new_crit = pow(clb_pin_crit, crit_params.crit_exponent);
         /*
@@ -114,10 +116,10 @@ void PlacerCriticalities::set_recompute_required() {
  * atom pin criticalities.
  */
 
-void PlacerCriticalities::incr_update_criticalities(const SetupTimingInfo* timing_info) {
+void PlacerCriticalities::incr_update_criticalities() {
     cluster_pins_with_modified_criticality_.clear();
 
-    for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_criticality()) {
+    for (AtomPinId atom_pin : timing_info_->pins_with_modified_setup_criticality()) {
         ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin);
 
         //Some atom pins correspond to connections which are completely
@@ -164,9 +166,12 @@ PlacerCriticalities::pin_range PlacerCriticalities::pins_with_modified_criticali
 /**************************************/
 
 ///@brief Allocates space for the timing_place_setup_slacks_ data structure.
-PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup)
+PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist,
+                                     const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+                                     std::shared_ptr<const SetupTimingInfo> timing_info)
     : clb_nlist_(clb_nlist)
     , pin_lookup_(netlist_pin_lookup)
+    , timing_info_(std::move(timing_info))
     , timing_place_setup_slacks_(make_net_pins_matrix(clb_nlist_, std::numeric_limits<float>::quiet_NaN())) {
 }
 
@@ -180,7 +185,7 @@ PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const Cl
  * In this case, `recompute_required` would be true, and we update all setup slacks
  * from scratch.
  */
-void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info) {
+void PlacerSetupSlacks::update_setup_slacks() {
     /* If update is not enabled, exit the routine. */
     if (!update_enabled) {
         /* re-computation is required on the next iteration */
@@ -190,7 +195,7 @@ void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info)
 
     /* Determine what pins need updating */
     if (!recompute_required) {
-        incr_update_setup_slacks(timing_info);
+        incr_update_setup_slacks();
     } else {
         recompute_setup_slacks();
     }
@@ -200,7 +205,7 @@ void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info)
         ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin);
         int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin);
 
-        float clb_pin_setup_slack = calculate_clb_net_pin_setup_slack(*timing_info, pin_lookup_, clb_pin);
+        float clb_pin_setup_slack = calculate_clb_net_pin_setup_slack(*timing_info_, pin_lookup_, clb_pin);
 
         timing_place_setup_slacks_[clb_net][pin_index_in_net] = clb_pin_setup_slack;
     }
@@ -217,10 +222,10 @@ void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info)
  * Note we use the set of pins reported by the *timing_info* as having modified
  * setup slacks, rather than those marked as modified by the timing analyzer.
  */
-void PlacerSetupSlacks::incr_update_setup_slacks(const SetupTimingInfo* timing_info) {
+void PlacerSetupSlacks::incr_update_setup_slacks() {
     cluster_pins_with_modified_setup_slack_.clear();
 
-    for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_slack()) {
+    for (AtomPinId atom_pin : timing_info_->pins_with_modified_setup_slack()) {
         ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin);
 
         //Some atom pins correspond to connections which are completely
diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h
index 852c1aa6297..71e144334ad 100644
--- a/vpr/src/place/timing_place.h
+++ b/vpr/src/place/timing_place.h
@@ -101,7 +101,9 @@ class PlacerCriticalities {
     typedef vtr::Range<net_iterator> net_range;
 
   public: //Lifetime
-    PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup);
+    PlacerCriticalities(const ClusteredNetlist& clb_nlist,
+                        const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+                        std::shared_ptr<const SetupTimingInfo> timing_info);
     PlacerCriticalities(const PlacerCriticalities&) = delete;
     PlacerCriticalities& operator=(const PlacerCriticalities&) = delete;
 
@@ -125,8 +127,7 @@ class PlacerCriticalities {
      * If out of sync, then the criticalities cannot be incrementally updated on
      * during the next timing analysis iteration.
      */
-    void update_criticalities(const SetupTimingInfo* timing_info,
-                              const PlaceCritParams& crit_params,
+    void update_criticalities(const PlaceCritParams& crit_params,
                               PlacerState& placer_state);
 
     ///@bried Enable the recompute_required flag to enforce from scratch update.
@@ -151,6 +152,9 @@ class PlacerCriticalities {
     ///@brief The lookup table that maps atom pins to clb pins.
     const ClusteredPinAtomPinsLookup& pin_lookup_;
 
+    ///@brief A pointer to the setup timing analyzer
+    std::shared_ptr<const SetupTimingInfo> timing_info_;
+
     /**
      * @brief The matrix that stores criticality value for each connection.
      *
@@ -168,7 +172,7 @@ class PlacerCriticalities {
     vtr::vec_id_set<ClusterPinId> cluster_pins_with_modified_criticality_;
 
     ///@brief Incremental update. See timing_place.cpp for more.
-    void incr_update_criticalities(const SetupTimingInfo* timing_info);
+    void incr_update_criticalities();
 
     ///@brief Flag that turns on/off the update_criticalities() routine.
     bool update_enabled = true;
@@ -215,7 +219,9 @@ class PlacerSetupSlacks {
     typedef vtr::Range<net_iterator> net_range;
 
   public: //Lifetime
-    PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup);
+    PlacerSetupSlacks(const ClusteredNetlist& clb_nlist,
+                      const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+                      std::shared_ptr<const SetupTimingInfo> timing_info);
     PlacerSetupSlacks(const PlacerSetupSlacks& clb_nlist) = delete;
     PlacerSetupSlacks& operator=(const PlacerSetupSlacks& clb_nlist) = delete;
 
@@ -232,14 +238,14 @@ class PlacerSetupSlacks {
   public: //Modifiers
     /**
      * @brief Updates setup slacks based on the atom netlist setup slacks provided
-     *        by timing_info.
+     *        by timing_info_.
      *
      * Should consistently call this method after the most recent timing analysis to
      * keep the setup slacks stored in this class in sync with the timing analyzer.
      * If out of sync, then the setup slacks cannot be incrementally updated on
      * during the next timing analysis iteration.
      */
-    void update_setup_slacks(const SetupTimingInfo* timing_info);
+    void update_setup_slacks();
 
     ///@bried Enable the recompute_required flag to enforce from scratch update.
     void set_recompute_required() { recompute_required = true; }
@@ -256,6 +262,7 @@ class PlacerSetupSlacks {
   private: //Data
     const ClusteredNetlist& clb_nlist_;
     const ClusteredPinAtomPinsLookup& pin_lookup_;
+    std::shared_ptr<const SetupTimingInfo> timing_info_;
 
     /**
      * @brief The matrix that stores raw setup slack values for each connection.
@@ -268,7 +275,7 @@ class PlacerSetupSlacks {
     vtr::vec_id_set<ClusterPinId> cluster_pins_with_modified_setup_slack_;
 
     ///@brief Incremental update. See timing_place.cpp for more.
-    void incr_update_setup_slacks(const SetupTimingInfo* timing_info);
+    void incr_update_setup_slacks();
 
     ///@brief Incremental update. See timing_place.cpp for more.
     void recompute_setup_slacks();
diff --git a/vpr/src/route/route.cpp b/vpr/src/route/route.cpp
index d4dbc2a4d55..08ef1892a49 100644
--- a/vpr/src/route/route.cpp
+++ b/vpr/src/route/route.cpp
@@ -202,7 +202,7 @@ bool route(const Netlist<>& net_list,
         netlist_pin_lookup,
         atom_ctx.nlist,
         atom_ctx.lookup,
-        *timing_info->timing_graph(),
+        timing_info,
         is_flat);
 
     std::unique_ptr<NetlistRouter> netlist_router = make_netlist_router(
diff --git a/vpr/src/route/route_net.h b/vpr/src/route/route_net.h
index fcfd5607582..f996be8b64c 100644
--- a/vpr/src/route/route_net.h
+++ b/vpr/src/route/route_net.h
@@ -93,7 +93,7 @@ inline void update_net_delay_from_isink(float* net_delay,
         //Delay changed, invalidate for incremental timing update
         VTR_ASSERT_SAFE(timing_info);
         ParentPinId pin = net_list.net_pin(inet, isink);
-        pin_timing_invalidator->invalidate_connection(pin, timing_info);
+        pin_timing_invalidator->invalidate_connection(pin);
     }
 
     net_delay[isink] = new_delay;
diff --git a/vpr/src/timing/NetPinTimingInvalidator.h b/vpr/src/timing/NetPinTimingInvalidator.h
index 754d118aef2..c76a075cb74 100644
--- a/vpr/src/timing/NetPinTimingInvalidator.h
+++ b/vpr/src/timing/NetPinTimingInvalidator.h
@@ -20,7 +20,7 @@ class NetPinTimingInvalidator {
     typedef vtr::Range<const tatum::EdgeId*> tedge_range;
     virtual ~NetPinTimingInvalidator() = default;
     virtual tedge_range pin_timing_edges(ParentPinId /* pin */) const = 0;
-    virtual void invalidate_connection(ParentPinId /* pin */, TimingInfo* /* timing_info */) = 0;
+    virtual void invalidate_connection(ParentPinId /* pin */) = 0;
     virtual void reset() = 0;
 
     /**
@@ -32,12 +32,10 @@ class NetPinTimingInvalidator {
      * Invalidate all the timing graph edges associated with these connections via
      * the NetPinTimingInvalidator class.
      */
-    void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, TimingInfo* timing_info) {
-        VTR_ASSERT_SAFE(timing_info);
-
+    void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected) {
         // Invalidate timing graph edges affected by the move
         for (ClusterPinId pin : blocks_affected.affected_pins) {
-            invalidate_connection(pin, timing_info);
+            invalidate_connection(pin);
         }
     }
 };
@@ -54,15 +52,17 @@ class IncrNetPinTimingInvalidator : public NetPinTimingInvalidator {
                                 const ClusteredPinAtomPinsLookup& clb_atom_pin_lookup,
                                 const AtomNetlist& atom_nlist,
                                 const AtomLookup& atom_lookup,
-                                const tatum::TimingGraph& timing_graph,
-                                bool is_flat) {
+                                std::shared_ptr<TimingInfo> timing_info,
+                                bool is_flat)
+        : timing_info_(std::move(timing_info)) {
+
         size_t num_pins = net_list.pins().size();
         pin_first_edge_.reserve(num_pins + 1); //Exact
         timing_edges_.reserve(num_pins + 1);   //Lower bound
         for (ParentPinId pin_id : net_list.pins()) {
             pin_first_edge_.push_back(timing_edges_.size());
             if (is_flat) {
-                tatum::EdgeId tedge = atom_pin_to_timing_edge(timing_graph, atom_nlist, atom_lookup, convert_to_atom_pin_id(pin_id));
+                tatum::EdgeId tedge = atom_pin_to_timing_edge(*timing_info_->timing_graph(), atom_nlist, atom_lookup, convert_to_atom_pin_id(pin_id));
 
                 if (!tedge) {
                     continue;
@@ -73,7 +73,7 @@ class IncrNetPinTimingInvalidator : public NetPinTimingInvalidator {
                 auto cluster_pin_id = convert_to_cluster_pin_id(pin_id);
                 auto atom_pins = clb_atom_pin_lookup.connected_atom_pins(cluster_pin_id);
                 for (const AtomPinId atom_pin : atom_pins) {
-                    tatum::EdgeId tedge = atom_pin_to_timing_edge(timing_graph, atom_nlist, atom_lookup, atom_pin);
+                    tatum::EdgeId tedge = atom_pin_to_timing_edge(*timing_info_->timing_graph(), atom_nlist, atom_lookup, atom_pin);
 
                     if (!tedge) {
                         continue;
@@ -101,11 +101,11 @@ class IncrNetPinTimingInvalidator : public NetPinTimingInvalidator {
     /** Invalidates all timing edges associated with the clustered netlist connection
      * driving the specified pin.
      * Is concurrently safe. */
-    void invalidate_connection(ParentPinId pin, TimingInfo* timing_info) {
+    void invalidate_connection(ParentPinId pin) {
         if (invalidated_pins_.count(pin)) return; //Already invalidated
 
         for (tatum::EdgeId edge : pin_timing_edges(pin)) {
-            timing_info->invalidate_delay(edge);
+            timing_info_->invalidate_delay(edge);
         }
 
         invalidated_pins_.insert(pin);
@@ -146,6 +146,7 @@ class IncrNetPinTimingInvalidator : public NetPinTimingInvalidator {
     }
 
   private:
+    std::shared_ptr<TimingInfo> timing_info_;
     std::vector<int> pin_first_edge_; //Indices into timing_edges corresponding
     std::vector<tatum::EdgeId> timing_edges_;
 
@@ -167,7 +168,7 @@ class NoopNetPinTimingInvalidator : public NetPinTimingInvalidator {
         return vtr::make_range((const tatum::EdgeId*)nullptr, (const tatum::EdgeId*)nullptr);
     }
 
-    void invalidate_connection(ParentPinId /* pin */, TimingInfo* /* timing_info */) {
+    void invalidate_connection(ParentPinId /* pin */) {
     }
 
     void reset() {
@@ -181,12 +182,13 @@ inline std::unique_ptr<NetPinTimingInvalidator> make_net_pin_timing_invalidator(
     const ClusteredPinAtomPinsLookup& clb_atom_pin_lookup,
     const AtomNetlist& atom_nlist,
     const AtomLookup& atom_lookup,
-    const tatum::TimingGraph& timing_graph,
+    const std::shared_ptr<TimingInfo>& timing_info,
     bool is_flat) {
     if (update_type == e_timing_update_type::FULL || update_type == e_timing_update_type::AUTO) {
         return std::make_unique<NoopNetPinTimingInvalidator>();
     } else {
         VTR_ASSERT(update_type == e_timing_update_type::INCREMENTAL);
-        return std::make_unique<IncrNetPinTimingInvalidator>(net_list, clb_atom_pin_lookup, atom_nlist, atom_lookup, timing_graph, is_flat);
+        return std::make_unique<IncrNetPinTimingInvalidator>(net_list, clb_atom_pin_lookup, atom_nlist,
+                                                             atom_lookup, timing_info, is_flat);
     }
 }
\ No newline at end of file
diff --git a/vpr/src/timing/timing_info.h b/vpr/src/timing/timing_info.h
index 836c95e50d7..fbd21cbf1bc 100644
--- a/vpr/src/timing/timing_info.h
+++ b/vpr/src/timing/timing_info.h
@@ -7,7 +7,7 @@
 #include "tatum/timing_paths.hpp"
 #include "timing_util.h"
 
-//Generic inteface which provides functionality to update (but not
+//Generic interface which provides functionality to update (but not
 //access) timing information.
 //
 //This is useful for algorithms which know they need to update timing

From d28d56d5a5128b51b16974648dd73fe5ef289874 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 27 Nov 2024 17:53:30 -0500
Subject: [PATCH 28/32] alloc_lookups_and_delay_model returns a unique_ptr
 instead of shared_ptr

---
 vpr/src/place/place_delay_model.cpp   |  2 +-
 vpr/src/place/place_delay_model.h     |  2 +-
 vpr/src/place/placer.cpp              |  2 +-
 vpr/src/place/placer.h                |  6 +++++-
 vpr/src/place/timing_place_lookup.cpp | 10 +++++-----
 vpr/src/place/timing_place_lookup.h   |  2 +-
 6 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/place_delay_model.cpp
index 36070bf8423..4f626a5817f 100644
--- a/vpr/src/place/place_delay_model.cpp
+++ b/vpr/src/place/place_delay_model.cpp
@@ -318,7 +318,7 @@ void OverrideDelayModel::write(const std::string& file) const {
 #endif
 
 ///@brief Initialize the placer delay model.
-std::shared_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
+std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
                                                                t_chan_width_dist chan_width_dist,
                                                                const t_placer_opts& placer_opts,
                                                                const t_router_opts& router_opts,
diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/place_delay_model.h
index d1cd3c2164a..0aa01385e6e 100644
--- a/vpr/src/place/place_delay_model.h
+++ b/vpr/src/place/place_delay_model.h
@@ -29,7 +29,7 @@ class PlaceDelayModel;
 class PlacerState;
 
 ///@brief Initialize the placer delay model.
-std::shared_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
+std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
                                                                t_chan_width_dist chan_width_dist,
                                                                const t_placer_opts& place_opts,
                                                                const t_router_opts& router_opts,
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index ab32b8a6890..409b1954a7b 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -281,7 +281,7 @@ void Placer::place() {
 
    bool skip_anneal = false;
 #ifdef ENABLE_ANALYTIC_PLACE
-   // When enabled, skip most of the annealing and go straight to quench
+   // Cluster-level analytic placer: when enabled, skip most of the annealing and go straight to quench
    if (placer_opts_.enable_analytic_placer) {
        skip_anneal = true;
    }
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index 66692b8ca1d..3e0462a6a9a 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -79,7 +79,11 @@ class Placer {
 
     /// Stores a placement state as a retrievable checkpoint in case the placement quality deteriorates later.
     t_placement_checkpoint placement_checkpoint_;
-    /// It holds a setup timing analysis engine. Other placement timing object usually have a reference or pointer to timing_info.
+    /**
+     * @brief Holds a setup timing analysis engine.
+     * Other placement timing objects like PlacerSetupSlacks, PlacerCriticalities, and NetPinTimingInvalidator
+     * have a pointer to timing_info. A shared pointer is used to manage the lifetime of the object.
+     */
     std::shared_ptr<SetupTimingInfo> timing_info_;
     /// Post-clustering delay calculator. Its API allows extraction of delay for each timing edge.
     std::shared_ptr<PlacementDelayCalculator> placement_delay_calc_;
diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp
index 873633a9c5e..86dc396e2b8 100644
--- a/vpr/src/place/timing_place_lookup.cpp
+++ b/vpr/src/place/timing_place_lookup.cpp
@@ -170,7 +170,7 @@ static float find_neighboring_average(vtr::NdMatrix<float, 4>& matrix,
 
 /******* Globally Accessible Functions **********/
 
-std::shared_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
+std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
                                                            const t_router_opts& router_opts,
                                                            const Netlist<>& net_list,
                                                            t_det_routing_arch* det_routing_arch,
@@ -196,15 +196,15 @@ std::shared_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts&
     int longest_length = get_longest_segment_length(segment_inf);
 
     /*now setup and compute the actual arrays */
-    std::shared_ptr<PlaceDelayModel> place_delay_model;
+    std::unique_ptr<PlaceDelayModel> place_delay_model;
     float min_cross_layer_delay = get_min_cross_layer_delay();
 
     if (placer_opts.delay_model_type == PlaceDelayModelType::SIMPLE) {
-        place_delay_model = std::make_shared<SimpleDelayModel>();
+        place_delay_model = std::make_unique<SimpleDelayModel>();
     } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) {
-        place_delay_model = std::make_shared<DeltaDelayModel>(min_cross_layer_delay, is_flat);
+        place_delay_model = std::make_unique<DeltaDelayModel>(min_cross_layer_delay, is_flat);
     } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) {
-        place_delay_model = std::make_shared<OverrideDelayModel>(min_cross_layer_delay, is_flat);
+        place_delay_model = std::make_unique<OverrideDelayModel>(min_cross_layer_delay, is_flat);
     } else {
         VTR_ASSERT_MSG(false, "Invalid placer delay model");
     }
diff --git a/vpr/src/place/timing_place_lookup.h b/vpr/src/place/timing_place_lookup.h
index 14897a7fcc4..fba3f470483 100644
--- a/vpr/src/place/timing_place_lookup.h
+++ b/vpr/src/place/timing_place_lookup.h
@@ -2,7 +2,7 @@
 #define TIMING_PLACE_LOOKUP_H
 #include "place_delay_model.h"
 
-std::shared_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
+std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
                                                            const t_router_opts& router_opts,
                                                            const Netlist<>& net_list,
                                                            t_det_routing_arch* det_routing_arch,

From ddfec83d7ae14ea9a9ccbb62aa33bcfa1e29f3fa Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 27 Nov 2024 18:07:12 -0500
Subject: [PATCH 29/32] rename place_log_util to placement_log_printer

---
 vpr/src/place/{place_log_util.cpp => placement_log_printer.cpp} | 2 +-
 vpr/src/place/{place_log_util.h => placement_log_printer.h}     | 0
 vpr/src/place/placer.h                                          | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename vpr/src/place/{place_log_util.cpp => placement_log_printer.cpp} (99%)
 rename vpr/src/place/{place_log_util.h => placement_log_printer.h} (100%)

diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/placement_log_printer.cpp
similarity index 99%
rename from vpr/src/place/place_log_util.cpp
rename to vpr/src/place/placement_log_printer.cpp
index d825a3af093..a4eafb3b30f 100644
--- a/vpr/src/place/place_log_util.cpp
+++ b/vpr/src/place/placement_log_printer.cpp
@@ -1,5 +1,5 @@
 
-#include "place_log_util.h"
+#include "placement_log_printer.h"
 
 #include "vtr_log.h"
 #include "annealer.h"
diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/placement_log_printer.h
similarity index 100%
rename from vpr/src/place/place_log_util.h
rename to vpr/src/place/placement_log_printer.h
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index 3e0462a6a9a..e5d23ff567c 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -26,7 +26,7 @@
 #include "placer_state.h"
 #include "noc_place_utils.h"
 #include "net_cost_handler.h"
-#include "place_log_util.h"
+#include "placement_log_printer.h"
 
 class PlacementAnnealer;
 namespace vtr{

From 1dcd63ba95e33d0c693d7df970f3dce133858fd8 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 27 Nov 2024 18:09:17 -0500
Subject: [PATCH 30/32] start measuring the placement time from the moment the
 Placer object is constructed

---
 vpr/src/place/compressed_grid.cpp | 9 +++++++++
 vpr/src/place/place.cpp           | 5 +++++
 vpr/src/place/placer.cpp          | 7 +------
 vpr/src/place/placer.h            | 2 --
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/vpr/src/place/compressed_grid.cpp b/vpr/src/place/compressed_grid.cpp
index 0e78e6b99b7..33fa04cbfc1 100644
--- a/vpr/src/place/compressed_grid.cpp
+++ b/vpr/src/place/compressed_grid.cpp
@@ -1,6 +1,9 @@
+
 #include "compressed_grid.h"
+
 #include "arch_util.h"
 #include "globals.h"
+#include "vtr_time.h"
 
 /**
  * @brief Creates a compressed grid from the given locations.
@@ -16,6 +19,12 @@ static t_compressed_block_grid create_compressed_block_grid(const std::vector<st
 
 
 std::vector<t_compressed_block_grid> create_compressed_block_grids() {
+    /* Measure how long it takes to allocate and initialize compressed grid.
+     * The measured execution time is printed when this object goes out of scope
+     * at the end of this function.
+     */
+    vtr::ScopedStartFinishTimer compressed_grid_timer("Compressed grid construction");
+
     auto& device_ctx = g_vpr_ctx.device();
     auto& grid = device_ctx.grid;
     const int num_layers = grid.get_num_layers();
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index b090e46d0a5..80a3b6edc24 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -93,6 +93,11 @@ void try_place(const Netlist<>& net_list,
     place_ctx.lock_loc_vars();
     place_ctx.compressed_block_grids = create_compressed_block_grids();
 
+    /* Start measuring placement time. The measured execution time will be printed
+     * when this object goes out of scope at the end of this function.
+     */
+    vtr::ScopedStartFinishTimer placement_timer("Placement");
+
     Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb, is_flat, /*quiet=*/false);
 
     placer.place();
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 409b1954a7b..cc3bd20f0c6 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -39,8 +39,7 @@ Placer::Placer(const Netlist<>& net_list,
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& atom_ctx = g_vpr_ctx.atom();
 
-    const auto& timing_ctx = g_vpr_ctx.timing();
-    pre_place_timing_stats_ = timing_ctx.stats;
+    pre_place_timing_stats_ = g_vpr_ctx.timing().stats;
 
     init_placement_context(placer_state_.mutable_blk_loc_registry(), directs);
 
@@ -49,10 +48,6 @@ Placer::Placer(const Netlist<>& net_list,
         noc_cost_handler_.emplace(placer_state_.block_locs());
     }
 
-    // Start measuring placement time
-    timer_ = std::make_unique<vtr::ScopedStartFinishTimer>("Placement");
-    timer_->quiet(quiet);
-
     /* To make sure the importance of NoC-related cost terms compared to
      * BB and timing cost is determine only through NoC placement weighting factor,
      * we normalize NoC-related cost weighting factors so that they add up to 1.
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index e5d23ff567c..dbbe7466e7a 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -96,8 +96,6 @@ class Placer {
     /// Stores information about the critical path. This is usually updated after that timing info is updated.
     tatum::TimingPathInfo critical_path_;
 
-    std::unique_ptr<vtr::ScopedStartFinishTimer> timer_;
-
     IntraLbPbPinLookup pb_gpin_lookup_;
     ClusteredPinAtomPinsLookup netlist_pin_lookup_;
 

From be433121c0b708f24724276ae425edc509d02744 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 27 Nov 2024 18:18:28 -0500
Subject: [PATCH 31/32] move the construction of pb_gpin_lookup and
 netlist_pin_lookup to try_place

---
 vpr/src/place/place.cpp  | 10 +++++++++-
 vpr/src/place/placer.cpp | 11 ++++-------
 vpr/src/place/placer.h   |  9 ++++++---
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 80a3b6edc24..3506d00b801 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -55,6 +55,8 @@ void try_place(const Netlist<>& net_list,
      */
     VTR_ASSERT(!is_flat);
     const auto& device_ctx = g_vpr_ctx.device();
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& atom_ctx = g_vpr_ctx.atom();
 
     /* Placement delay model is independent of the placement and can be shared across
      * multiple placers if we are performing parallel annealing.
@@ -98,7 +100,13 @@ void try_place(const Netlist<>& net_list,
      */
     vtr::ScopedStartFinishTimer placement_timer("Placement");
 
-    Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb, is_flat, /*quiet=*/false);
+    // Enables fast look-up pb graph pins from block pin indices
+    IntraLbPbPinLookup pb_gpin_lookup(device_ctx.logical_block_types);
+    // Enables fast look-up of atom pins connect to CLB pins
+    ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, pb_gpin_lookup);
+
+    Placer placer(net_list, placer_opts, analysis_opts, noc_opts, pb_gpin_lookup, netlist_pin_lookup,
+                  directs, place_delay_model, cube_bb, is_flat, /*quiet=*/false);
 
     placer.place();
 
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index cc3bd20f0c6..37b48f11d0d 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -20,6 +20,8 @@ Placer::Placer(const Netlist<>& net_list,
                const t_placer_opts& placer_opts,
                const t_analysis_opts& analysis_opts,
                const t_noc_opts& noc_opts,
+               const IntraLbPbPinLookup& pb_gpin_lookup,
+               const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
                const std::vector<t_direct_inf>& directs,
                std::shared_ptr<PlaceDelayModel> place_delay_model,
                bool cube_bb,
@@ -28,6 +30,8 @@ Placer::Placer(const Netlist<>& net_list,
     : placer_opts_(placer_opts)
     , analysis_opts_(analysis_opts)
     , noc_opts_(noc_opts)
+    , pb_gpin_lookup_(pb_gpin_lookup)
+    , netlist_pin_lookup_(netlist_pin_lookup)
     , costs_(placer_opts.place_algorithm, noc_opts.noc)
     , placer_state_(placer_opts.place_algorithm.is_timing_driven(), cube_bb)
     , rng_(placer_opts.seed)
@@ -36,8 +40,6 @@ Placer::Placer(const Netlist<>& net_list,
     , log_printer_(*this, quiet)
     , is_flat_(is_flat) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
-    const auto& device_ctx = g_vpr_ctx.device();
-    const auto& atom_ctx = g_vpr_ctx.atom();
 
     pre_place_timing_stats_ = g_vpr_ctx.timing().stats;
 
@@ -102,11 +104,6 @@ Placer::Placer(const Netlist<>& net_list,
        init_draw_coords((float)width_fac, placer_state_.blk_loc_registry());
    }
 
-   // Allocate here because it goes into timing critical code where each memory allocation is expensive
-   pb_gpin_lookup_ = IntraLbPbPinLookup(device_ctx.logical_block_types);
-   // Enables fast look-up of atom pins connect to CLB pins
-   netlist_pin_lookup_ = ClusteredPinAtomPinsLookup(cluster_ctx.clb_nlist, atom_ctx.nlist, pb_gpin_lookup_);
-
    // Gets initial cost and loads bounding boxes.
    costs_.bb_cost = net_cost_handler_.comp_bb_cost(e_cost_methods::NORMAL);
    costs_.bb_cost_norm = 1 / costs_.bb_cost;
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index dbbe7466e7a..086630e2c09 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -39,6 +39,8 @@ class Placer {
            const t_placer_opts& placer_opts,
            const t_analysis_opts& analysis_opts,
            const t_noc_opts& noc_opts,
+           const IntraLbPbPinLookup& pb_gpin_lookup,
+           const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
            const std::vector<t_direct_inf>& directs,
            std::shared_ptr<PlaceDelayModel> place_delay_model,
            bool cube_bb,
@@ -60,6 +62,10 @@ class Placer {
     const t_analysis_opts& analysis_opts_;
     /// Holds NoC-related parameters
     const t_noc_opts& noc_opts_;
+    /// Enables fast look-up pb graph pins from block pin indices
+    const IntraLbPbPinLookup& pb_gpin_lookup_;
+    /// Enables fast look-up of atom pins connect to CLB pins
+    const ClusteredPinAtomPinsLookup& netlist_pin_lookup_;
     /// Placement cost terms with their normalization factors and total cost
     t_placer_costs costs_;
     /// Holds timing, runtime, and block location information
@@ -96,9 +102,6 @@ class Placer {
     /// Stores information about the critical path. This is usually updated after that timing info is updated.
     tatum::TimingPathInfo critical_path_;
 
-    IntraLbPbPinLookup pb_gpin_lookup_;
-    ClusteredPinAtomPinsLookup netlist_pin_lookup_;
-
     /// Performs random swaps and implements the simulated annealer optimizer.
     std::unique_ptr<PlacementAnnealer> annealer_;
 

From 6905b3ecd0c2b0b99b0a51e76f3977e81cd0ece2 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 28 Nov 2024 15:20:20 -0500
Subject: [PATCH 32/32] add more comments

---
 vpr/src/place/placement_log_printer.h | 50 ++++++++++++++++++++-------
 vpr/src/place/placer.h                | 13 ++++++-
 2 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/vpr/src/place/placement_log_printer.h b/vpr/src/place/placement_log_printer.h
index 8c437a922fa..d538c20d895 100644
--- a/vpr/src/place/placement_log_printer.h
+++ b/vpr/src/place/placement_log_printer.h
@@ -2,17 +2,9 @@
  * @file placement_log_printer.h
  * @brief Declares the PlacementLogPrinter class and associated utilities for logging
  * and reporting placement-related statistics and timing analysis results.
- *
- * This file provides tools to monitor and report the progress and results of the placement stage.
- *
- * ### Key Components:
- * - **PlacementLogPrinter**:
- *   - A utility class for logging placement status, resource utilization, and swap statistics.
- *   - Prints detailed statistics during the placement process, including initial and post-placement states.
- *   - Supports a "quiet mode" to suppress output.
- *
+
  * ### Integration:
- * The tools in this file integrate with the Placer class to provide information about
+ * The PlacementLogPrinter class integrates with the Placer class to provide information about
  * the placement process for debugging, optimization, and analysis purposes.
  */
 
@@ -33,20 +25,54 @@ struct t_swap_stats;
 class BlkLocRegistry;
 class Placer;
 
+/**
+ * @class PlacementLogPrinter
+ * @brief A utility class for logging placement status and
+ * updating the screen view when graphics are enabled.
+ */
 class PlacementLogPrinter {
   public:
-    explicit PlacementLogPrinter(const Placer& placer, bool quiet);
+    /**
+     * @param placer The placer object from which the placement status is retrieved.
+     * @param quiet When set true, the logger doesn't print any information.
+     */
+    PlacementLogPrinter(const Placer& placer,
+                        bool quiet);
 
+    /**
+     * @brief Prints the placement status header that shows which metrics are reported
+     * in each iteration of the annealer's outer loop.
+     * @details This method should be called once before the first call to print_place_status().
+     */
     void print_place_status_header() const;
+
+    /**
+     * @brief Print placement metrics and elapsed time after each outer loop iteration of the annealer.
+     * If graphics are on, the function will the screen view.
+     * @param elapsed_sec Time spent in the latest outer loop iteration.
+     */
+    void print_place_status(float elapsed_sec) const;
+
+    /// Reports the resource utilization for each block type.
     void print_resources_utilization() const;
+    /// Reports the number of tried temperatures, total swaps, and how many were accepted or rejected.
     void print_placement_swaps_stats() const;
-    void print_place_status(float elapsed_sec) const;
+    /// Reports placement metrics after the initial placement.
     void print_initial_placement_stats() const;
+    /// Prints final placement metrics and generates timing reports.
     void print_post_placement_stats() const;
 
   private:
+    /**
+     * @brief A constant reference to the Placer object to access the placement status.
+     * @details PlacementLogPrinter is a friend class for the Placer class, so it can
+     * access all its private data members. This reference is made constant to avoid
+     * any accidental modification of the Placer object.
+     */
     const Placer& placer_;
+    /// Specifies whether this object prints logs and updates the graphics.
     const bool quiet_;
+    /// A string buffer to carry the message to shown in the graphical interface.
     mutable std::vector<char> msg_;
 };
 
diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h
index 086630e2c09..99c00d7e8e5 100644
--- a/vpr/src/place/placer.h
+++ b/vpr/src/place/placer.h
@@ -1,7 +1,7 @@
 /**
  * @file placer.h
  * @brief Declares the Placer class, which encapsulates the functionality, data structures,
- * and algorithms required for the placement stage.
+ * and algorithms required for the (annealing-based) placement stage
  *
  * The Placer class initializes necessary objects, performs an initial placement,
  * and runs simulated annealing optimization. This optimization minimizes
@@ -47,6 +47,17 @@ class Placer {
            bool is_flat,
            bool quiet);
 
+    /**
+     * @brief Executes the simulated annealing algorithm to optimize placement.
+     *
+     * This function minimizes placement costs, including bounding box and timing costs,
+     * using simulated annealing. During the process, it periodically updates timing information
+     * and saves a checkpoint of the best placement encountered.
+     *
+     * After the simulated annealing completes, the final placement is evaluated against the
+     * checkpoint. If the final placement's quality is worse than the checkpoint, the checkpoint
+     * is restored. The final placement is then validated for legality.
+     */
     void place();
 
     /**