From 508206c558ae93a07cbfc7dfd963ee3f29f02411 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 22 Jan 2024 14:11:34 -0500
Subject: [PATCH 01/41] Avoid passing place_ctx.block_locs as argument in a
 function call hierarchy.

---
 vpr/src/place/noc_place_utils.cpp | 30 +++++++++++++-----------------
 vpr/src/place/noc_place_utils.h   | 12 +++---------
 2 files changed, 16 insertions(+), 26 deletions(-)
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 24745755123..f982329fbf3 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -22,9 +22,6 @@ static std::vector<NocTrafficFlowId> affected_traffic_flows;
 static bool select_random_router_cluster(ClusterBlockId& b_from, t_pl_loc& from, t_logical_block_type_ptr& cluster_from_type);
 
 void initial_noc_routing(void) {
-    // need to get placement information about where the router cluster blocks are placed on the device
-    const auto& place_ctx = g_vpr_ctx.placement();
-
     // need to update the link usages within after routing all the traffic flows
     // also need to route all the traffic flows and store them
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
@@ -40,7 +37,7 @@ void initial_noc_routing(void) {
         const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage->get_single_noc_traffic_flow(traffic_flow_id);
 
         // update the traffic flow route based on where the router cluster blocks are placed
-        std::vector<NocLinkId>& curr_traffic_flow_route = get_traffic_flow_route(traffic_flow_id, noc_ctx.noc_model, *noc_traffic_flows_storage, *noc_ctx.noc_flows_router, place_ctx.block_locs);
+        std::vector<NocLinkId>& curr_traffic_flow_route = get_traffic_flow_route(traffic_flow_id, noc_ctx.noc_model, *noc_traffic_flows_storage, *noc_ctx.noc_flows_router);
 
         // update the links used in the found traffic flow route, links' bandwidth should be incremented since the traffic flow is routed
         update_traffic_flow_link_usage(curr_traffic_flow_route, noc_ctx.noc_model, 1, curr_traffic_flow.traffic_flow_bandwidth);
@@ -66,8 +63,6 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
 }
 
 void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, double& noc_aggregate_bandwidth_delta_c, double& noc_latency_delta_c, const t_noc_opts& noc_opts) {
-    // provides the positions where the affected blocks have moved to
-    auto& place_ctx = g_vpr_ctx.placement();
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
     NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
@@ -85,7 +80,7 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move
         // check if the current moved block is a noc router
         if (noc_traffic_flows_storage->check_if_cluster_block_has_traffic_flows(blk)) {
             // current block is a router, so re-route all the traffic flows it is a part of
-            re_route_associated_traffic_flows(blk, *noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, place_ctx.block_locs, updated_traffic_flows);
+            re_route_associated_traffic_flows(blk, *noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, updated_traffic_flows);
         }
     }
 
@@ -118,7 +113,10 @@ void commit_noc_costs() {
     return;
 }
 
-std::vector<NocLinkId>& get_traffic_flow_route(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, NocRouting& noc_flows_router, const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations) {
+std::vector<NocLinkId>& get_traffic_flow_route(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, NocRouting& noc_flows_router) {
+    // provides the positions where the affected blocks have moved to
+    auto& place_ctx = g_vpr_ctx.placement();
+
     // get the traffic flow with the current id
     const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
@@ -127,8 +125,8 @@ std::vector<NocLinkId>& get_traffic_flow_route(NocTrafficFlowId traffic_flow_id,
     ClusterBlockId logical_sink_router_block_id = curr_traffic_flow.sink_router_cluster_id;
 
     // get the ids of the hard router blocks where the logical router cluster blocks have been placed
-    NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(placed_cluster_block_locations[logical_source_router_block_id].loc);
-    NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(placed_cluster_block_locations[logical_sink_router_block_id].loc);
+    NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(place_ctx.block_locs[logical_source_router_block_id].loc);
+    NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(place_ctx.block_locs[logical_sink_router_block_id].loc);
 
     // route the current traffic flow
     std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage.get_mutable_traffic_flow_route(traffic_flow_id);
@@ -153,7 +151,7 @@ void update_traffic_flow_link_usage(const std::vector<NocLinkId>& traffic_flow_r
     return;
 }
 
-void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations, std::unordered_set<NocTrafficFlowId>& updated_traffic_flows) {
+void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, std::unordered_set<NocTrafficFlowId>& updated_traffic_flows) {
     // get all the associated traffic flows for the logical router cluster block
     const std::vector<NocTrafficFlowId>* assoc_traffic_flows = noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(moved_block_router_id);
 
@@ -164,7 +162,7 @@ void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, Noc
             // first check to see whether we have already re-routed the current traffic flow and only re-route it if we haven't already.
             if (updated_traffic_flows.find(traffic_flow_id) == updated_traffic_flows.end()) {
                 // now update the current traffic flow by re-routing it based on the new locations of its src and destination routers
-                re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_model, noc_flows_router, placed_cluster_block_locations);
+                re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_model, noc_flows_router);
 
                 // now make sure we don't update this traffic flow a second time by adding it to the group of updated traffic flows
                 updated_traffic_flows.insert(traffic_flow_id);
@@ -179,8 +177,6 @@ void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, Noc
 }
 
 void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affected) {
-    // provides the positions where the affected blocks have moved to
-    auto& place_ctx = g_vpr_ctx.placement();
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
     NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
@@ -207,7 +203,7 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect
                     // first check to see whether we have already reverted the current traffic flow and only revert it if we haven't already.
                     if (reverted_traffic_flows.find(traffic_flow_id) == reverted_traffic_flows.end()) {
                         // Revert the traffic flow route by re-routing it
-                        re_route_traffic_flow(traffic_flow_id, *noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, place_ctx.block_locs);
+                        re_route_traffic_flow(traffic_flow_id, *noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router);
 
                         // make sure we do not revert this traffic flow again
                         reverted_traffic_flows.insert(traffic_flow_id);
@@ -220,7 +216,7 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect
     return;
 }
 
-void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations) {
+void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router) {
     // get the current traffic flow info
     const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
@@ -233,7 +229,7 @@ void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& no
     update_traffic_flow_link_usage(curr_traffic_flow_route, noc_model, -1, curr_traffic_flow.traffic_flow_bandwidth);
 
     // now get the re-routed traffic flow route and increment all the link usages with this reverted route
-    std::vector<NocLinkId>& re_routed_traffic_flow_route = get_traffic_flow_route(traffic_flow_id, noc_model, noc_traffic_flows_storage, noc_flows_router, placed_cluster_block_locations);
+    std::vector<NocLinkId>& re_routed_traffic_flow_route = get_traffic_flow_route(traffic_flow_id, noc_model, noc_traffic_flows_storage, noc_flows_router);
     update_traffic_flow_link_usage(re_routed_traffic_flow_route, noc_model, 1, curr_traffic_flow.traffic_flow_bandwidth);
 
     return;
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index 5dbaed43f8f..a8e27654081 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -139,11 +139,9 @@ void commit_noc_costs();
  * within the NoC. Used to get the current traffic flow information.
  * @param noc_flows_router The packet routing algorithm used to route traffic
  * flows within the NoC.
- * @param placed_cluster_block_locations A datastructure that identifies the
- * placed grid locations of all cluster blocks.
  * @return std::vector<NocLinkId>& The found route for the traffic flow.
  */
-std::vector<NocLinkId>& get_traffic_flow_route(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, NocRouting& noc_flows_router, const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations);
+std::vector<NocLinkId>& get_traffic_flow_route(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, NocRouting& noc_flows_router);
 
 /**
  * @brief Updates the bandwidth usages of links found in a routed traffic flow.
@@ -188,12 +186,10 @@ void update_traffic_flow_link_usage(const std::vector<NocLinkId>& traffic_flow_r
  * to route traffic flows within the NoC.  
  * @param noc_flows_router The packet routing algorithm used to route traffic
  * flows within the NoC.
- * @param placed_cluster_block_locations A datastructure that identifies the
- * placed grid locations of all cluster blocks.
  * @param updated_traffic_flows Keeps track of traffic flows that have been
  * re-routed. Used to prevent re-routing the same traffic flow multiple times.
  */
-void re_route_associated_traffic_flows(ClusterBlockId moved_router_block_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations, std::unordered_set<NocTrafficFlowId>& updated_traffic_flows);
+void re_route_associated_traffic_flows(ClusterBlockId moved_router_block_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, std::unordered_set<NocTrafficFlowId>& updated_traffic_flows);
 
 /**
  * @brief Used to re-route all the traffic flows associated to logical
@@ -223,10 +219,8 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect
  * to route traffic flows within the NoC.
  * @param noc_flows_router The packet routing algorithm used to route traffic
  * flows within the NoC.
- * @param placed_cluster_block_locations A datastructure that identifies the
- * placed grid locations of all cluster blocks.
  */
-void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations);
+void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router);
 
 /**
  * @brief Recompute the NoC costs (aggregate bandwidth and latency) by

From 0bb3ffc8587dd4acc887a3e1521f2ceb27683c02 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 22 Jan 2024 14:20:52 -0500
Subject: [PATCH 02/41] Add bandwidth and congestion to NoCLink

---
 vpr/src/noc/noc_link.cpp    | 34 +++++++++++++++++++++++++++++-----
 vpr/src/noc/noc_link.h      | 11 ++++++++++-
 vpr/src/noc/noc_storage.cpp |  3 ++-
 vpr/src/noc/noc_storage.h   |  4 ++--
 4 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/vpr/src/noc/noc_link.cpp b/vpr/src/noc/noc_link.cpp
index de15d5a4a6d..855473799aa 100644
--- a/vpr/src/noc/noc_link.cpp
+++ b/vpr/src/noc/noc_link.cpp
@@ -1,12 +1,11 @@
 #include "noc_link.h"
 
 // constructor
-NocLink::NocLink(NocRouterId source, NocRouterId sink)
+NocLink::NocLink(NocRouterId source, NocRouterId sink, double bw)
     : source_router(source)
-    , sink_router(sink) {
-    // initialize variables
-    bandwidth_usage = 0.0;
-}
+    , sink_router(sink)
+    , bandwidth_usage(0.0)
+    , bandwidth(bw) { }
 
 // getters
 NocRouterId NocLink::get_source_router(void) const {
@@ -34,4 +33,29 @@ void NocLink::set_sink_router(NocRouterId sink) {
 
 void NocLink::set_bandwidth_usage(double new_bandwidth_usage) {
     bandwidth_usage = new_bandwidth_usage;
+}
+
+void NocLink::set_bandwidth(double new_bandwidth) {
+    bandwidth = new_bandwidth;
+    return;
+}
+
+double NocLink::get_bandwidth() const {
+    return bandwidth;
+}
+
+double NocLink::get_congested_bandwidth() const {
+    double congested_bandwidth = bandwidth_usage - bandwidth;
+    congested_bandwidth = std::max(congested_bandwidth, 0.0);
+
+    VTR_ASSERT(congested_bandwidth >= 0.0);
+    return congested_bandwidth;
+}
+
+double NocLink::get_congested_bandwidth_ratio() const {
+    double congested_bw = get_congested_bandwidth();
+    double congested_bw_ratio = congested_bw / get_bandwidth();
+
+    VTR_ASSERT(congested_bw_ratio >= 0.0);
+    return congested_bw_ratio;
 }
\ No newline at end of file
diff --git a/vpr/src/noc/noc_link.h b/vpr/src/noc/noc_link.h
index dee19cc676b..3e7314d21ed 100644
--- a/vpr/src/noc/noc_link.h
+++ b/vpr/src/noc/noc_link.h
@@ -48,9 +48,10 @@ class NocLink {
     NocRouterId sink_router;   /*!< The router which uses this link as an incoming edge*/
 
     double bandwidth_usage; /*!< Represents the bandwidth of the data being transmitted on the link. Units in bits-per-second(bps)*/
+    double bandwidth; /*!< Represents the maximum bits per second that can be transmitted over the link without causing congestion*/
 
   public:
-    NocLink(NocRouterId source_router, NocRouterId sink_router);
+    NocLink(NocRouterId source_router, NocRouterId sink_router, double bw);
 
     // getters
 
@@ -97,6 +98,14 @@ class NocLink {
      * @param new_bandwidth_usage The new value of the bandwidth of the link
      */
     void set_bandwidth_usage(double new_bandwidth_usage);
+
+    void set_bandwidth(double new_bandwidth);
+
+    double get_bandwidth() const;
+
+    double get_congested_bandwidth() const;
+
+    double get_congested_bandwidth_ratio() const;
 };
 
 #endif
\ No newline at end of file
diff --git a/vpr/src/noc/noc_storage.cpp b/vpr/src/noc/noc_storage.cpp
index 70c92878f82..d4c717b3971 100644
--- a/vpr/src/noc/noc_storage.cpp
+++ b/vpr/src/noc/noc_storage.cpp
@@ -100,7 +100,8 @@ void NocStorage::add_router(int id, int grid_position_x, int grid_posistion_y, i
 
 void NocStorage::add_link(NocRouterId source, NocRouterId sink) {
     VTR_ASSERT_MSG(!built_noc, "NoC already built, cannot modify further.");
-    link_storage.emplace_back(source, sink);
+    double link_bandwidth = get_noc_link_bandwidth();
+    link_storage.emplace_back(source, sink, link_bandwidth);
 
     // the newly added link was added to the back of the list, so we can get the id as the last element in the list
     NocLinkId added_link_id((int)link_storage.size() - 1);
diff --git a/vpr/src/noc/noc_storage.h b/vpr/src/noc/noc_storage.h
index f35f0121eb2..0749def9de6 100644
--- a/vpr/src/noc/noc_storage.h
+++ b/vpr/src/noc/noc_storage.h
@@ -24,13 +24,13 @@
  * 
  * Link
  * ----
- * A link is a component of the NoC ans is defined by the
+ * A link is a component of the NoC and is defined by the
  * NocLink class. Links are connections between two routers.
  * Links are used by routers to communicate with other routers
  * in the NoC. They can be thought of as edges in a graph. Links
  * have a source router where they exit from and sink router where
  * they enter. It is important to note that the links are not
- * unidirectional, the legal way to traverse a link is from the
+ * bi-directional, the legal way to traverse a link is from the
  * source router of the link to the sink router.
  * 
  */

From 60a740b034eca8bb8598dd50679b8a1865c0ad26 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 22 Jan 2024 14:22:45 -0500
Subject: [PATCH 03/41] Replaced pointers to
 g_vpr_ctx.noc().noc_traffic_flows_storage with reference.

---
 vpr/src/place/noc_place_utils.cpp | 54 ++++++++++++++++---------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index f982329fbf3..59e10ebaac1 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -26,18 +26,18 @@ void initial_noc_routing(void) {
     // also need to route all the traffic flows and store them
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
-    NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
     /* We need all the traffic flow ids to be able to access them. The range
      * of traffic flow ids go from 0 to the total number of traffic flows within
      * the NoC.
      * go through all the traffic flows and route them. Then once routed, update the links used in the routed traffic flows with their usages
      */
-    for (const auto& traffic_flow_id : noc_traffic_flows_storage->get_all_traffic_flow_id()) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage->get_single_noc_traffic_flow(traffic_flow_id);
+    for (const auto& traffic_flow_id : noc_traffic_flows_storage.get_all_traffic_flow_id()) {
+        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
         // update the traffic flow route based on where the router cluster blocks are placed
-        std::vector<NocLinkId>& curr_traffic_flow_route = get_traffic_flow_route(traffic_flow_id, noc_ctx.noc_model, *noc_traffic_flows_storage, *noc_ctx.noc_flows_router);
+        std::vector<NocLinkId>& curr_traffic_flow_route = get_traffic_flow_route(traffic_flow_id, noc_ctx.noc_model, noc_traffic_flows_storage, *noc_ctx.noc_flows_router);
 
         // update the links used in the found traffic flow route, links' bandwidth should be incremented since the traffic flow is routed
         update_traffic_flow_link_usage(curr_traffic_flow_route, noc_ctx.noc_model, 1, curr_traffic_flow.traffic_flow_bandwidth);
@@ -63,9 +63,11 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
 }
 
 void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, double& noc_aggregate_bandwidth_delta_c, double& noc_latency_delta_c, const t_noc_opts& noc_opts) {
+    VTR_ASSERT_SAFE(noc_aggregate_bandwidth_delta_c == 0.);
+    VTR_ASSERT_SAFE(noc_latency_delta_c == 0.);
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
-    NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
     // keeps track of traffic flows that have been re-routed
     // This is useful for cases where two moved routers were part of the same traffic flow and prevents us from re-routing the same flow twice.
@@ -78,19 +80,19 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move
         ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;
 
         // check if the current moved block is a noc router
-        if (noc_traffic_flows_storage->check_if_cluster_block_has_traffic_flows(blk)) {
+        if (noc_traffic_flows_storage.check_if_cluster_block_has_traffic_flows(blk)) {
             // current block is a router, so re-route all the traffic flows it is a part of
-            re_route_associated_traffic_flows(blk, *noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, updated_traffic_flows);
+            re_route_associated_traffic_flows(blk, noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, updated_traffic_flows);
         }
     }
 
     // go through all the affected traffic flows and calculate their new costs after being re-routed, then determine the change in cost before the traffic flows were modified
     for (auto& traffic_flow_id : affected_traffic_flows) {
         // get the traffic flow route
-        const std::vector<NocLinkId>& traffic_flow_route = noc_traffic_flows_storage->get_traffic_flow_route(traffic_flow_id);
+        const std::vector<NocLinkId>& traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
 
         // get the current traffic flow info
-        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage->get_single_noc_traffic_flow(traffic_flow_id);
+        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
         proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = calculate_traffic_flow_aggregate_bandwidth_cost(traffic_flow_route, curr_traffic_flow);
         proposed_traffic_flow_costs[traffic_flow_id].latency = calculate_traffic_flow_latency_cost(traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow, noc_opts);
@@ -179,7 +181,7 @@ void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, Noc
 void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affected) {
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
-    NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
     // keeps track of traffic flows that have been reverted
     // This is useful for cases where two moved routers were part of the same traffic flow and prevents us from re-routing the same flow twice.
@@ -190,11 +192,11 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect
         ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;
 
         // check if the current moved block is a noc router
-        if (noc_traffic_flows_storage->check_if_cluster_block_has_traffic_flows(blk)) {
+        if (noc_traffic_flows_storage.check_if_cluster_block_has_traffic_flows(blk)) {
             // current block is a router, so re-route all the traffic flows it is a part of //
 
             // get all the associated traffic flows for the logical router cluster block
-            const std::vector<NocTrafficFlowId>* assoc_traffic_flows = noc_traffic_flows_storage->get_traffic_flows_associated_to_router_block(blk);
+            const std::vector<NocTrafficFlowId>* assoc_traffic_flows = noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(blk);
 
             // now check if there are any associated traffic flows
             if (assoc_traffic_flows->size() != 0) {
@@ -203,7 +205,7 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect
                     // first check to see whether we have already reverted the current traffic flow and only revert it if we haven't already.
                     if (reverted_traffic_flows.find(traffic_flow_id) == reverted_traffic_flows.end()) {
                         // Revert the traffic flow route by re-routing it
-                        re_route_traffic_flow(traffic_flow_id, *noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router);
+                        re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router);
 
                         // make sure we do not revert this traffic flow again
                         reverted_traffic_flows.insert(traffic_flow_id);
@@ -267,15 +269,15 @@ double comp_noc_aggregate_bandwidth_cost(void) {
     // used to get traffic flow route information
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
     // datastructure that stores all the traffic flow routes
-    const NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
     double noc_aggregate_bandwidth_cost = 0.;
 
     // now go through each traffic flow route and calculate its
     // aggregate bandwidth. Then store this in local data structures and accumulate it.
     for (const auto& traffic_flow_id : g_vpr_ctx.noc().noc_traffic_flows_storage.get_all_traffic_flow_id()) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage->get_single_noc_traffic_flow(traffic_flow_id);
-        const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage->get_traffic_flow_route(traffic_flow_id);
+        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
+        const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
 
         double curr_traffic_flow_aggregate_bandwidth_cost = calculate_traffic_flow_aggregate_bandwidth_cost(curr_traffic_flow_route, curr_traffic_flow);
 
@@ -293,15 +295,15 @@ double comp_noc_latency_cost(const t_noc_opts& noc_opts) {
     // used to get traffic flow route information
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
     // datastructure that stores all the traffic flow routes
-    const NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
     double noc_latency_cost = 0.;
 
     // now go through each traffic flow route and calculate its
     // latency. Then store this in local data structures and accumulate it.
     for (const auto& traffic_flow_id : noc_ctx.noc_traffic_flows_storage.get_all_traffic_flow_id()) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage->get_single_noc_traffic_flow(traffic_flow_id);
-        const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage->get_traffic_flow_route(traffic_flow_id);
+        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
+        const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
 
         double curr_traffic_flow_latency_cost = calculate_traffic_flow_latency_cost(curr_traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow, noc_opts);
 
@@ -326,7 +328,7 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
 
     auto& noc_ctx = g_vpr_ctx.noc();
     const NocStorage* noc_model = &noc_ctx.noc_model;
-    const NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
     // need to create a temporary noc routing algorithm
     NocRoutingAlgorithmCreator routing_algorithm_factory;
@@ -336,9 +338,9 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
     std::vector<NocLinkId> temp_found_noc_route;
 
     // go through all the traffic flows and find a route for them based on where the routers are placed within the NoC
-    for (const auto& traffic_flow_id : noc_traffic_flows_storage->get_all_traffic_flow_id()) {
+    for (const auto& traffic_flow_id : noc_traffic_flows_storage.get_all_traffic_flow_id()) {
         // get the traffic flow with the current id
-        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage->get_single_noc_traffic_flow(traffic_flow_id);
+        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
         // get the source and destination logical router blocks in the current traffic flow
         ClusterBlockId logical_source_router_block_id = curr_traffic_flow.source_router_cluster_id;
@@ -417,14 +419,14 @@ int get_number_of_traffic_flows_with_latency_cons_met(void) {
     // used to get traffic flow route information
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
     // datastructure that stores all the traffic flow routes
-    const NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
     int count_of_achieved_latency_cons = 0;
 
     // now go through each traffic flow route and check if its latency constraint was met
-    for (const auto& traffic_flow_id : noc_traffic_flows_storage->get_all_traffic_flow_id()) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage->get_single_noc_traffic_flow(traffic_flow_id);
-        const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage->get_traffic_flow_route(traffic_flow_id);
+    for (const auto& traffic_flow_id : noc_traffic_flows_storage.get_all_traffic_flow_id()) {
+        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
+        const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
 
         // there will always be one more router than links in a traffic flow
         int num_of_links_in_traffic_flow = curr_traffic_flow_route.size();

From 1519e60fcc8d0c5b4a594ea90b59ec9c1f6c0fe3 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 22 Jan 2024 14:59:13 -0500
Subject: [PATCH 04/41] compute NoC congestion cost difference for router swap

---
 vpr/src/base/vpr_types.h                |  1 +
 vpr/src/noc/noc_link.cpp                | 13 +++-
 vpr/src/noc/noc_link.h                  |  8 ++-
 vpr/src/noc/noc_storage.cpp             |  9 ++-
 vpr/src/place/initial_noc_placement.cpp |  4 +-
 vpr/src/place/noc_place_utils.cpp       | 96 +++++++++++++++++++++++--
 vpr/src/place/noc_place_utils.h         |  9 ++-
 vpr/src/place/place.cpp                 |  4 +-
 vpr/src/place/place_util.h              |  2 +
 vpr/test/test_noc_place_utils.cpp       |  3 +-
 10 files changed, 135 insertions(+), 14 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index d2f86c0af47..2965dc30856 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1498,6 +1498,7 @@ struct t_noc_opts {
     double noc_placement_weighting;           ///<controls the significance of the NoC placement cost relative to the total placement cost range:[0-inf)
     double noc_latency_constraints_weighting; ///<controls the significance of meeting the traffic flow contraints range:[0-inf)
     double noc_latency_weighting;             ///<controls the significance of the traffic flow latencies relative to the other NoC placement costs range:[0-inf)
+    double noc_congestion_weighting;           ///<controls the significance of the link congestions relative to the other NoC placement costs range:[0-inf)
     int noc_swap_percentage;                  ///<controls the number of NoC router block swap attemps relative to the total number of swaps attempted by the placer range:[0-100]
     std::string noc_placement_file_name;      ///<is the name of the output file that contains the NoC placement information
 };
diff --git a/vpr/src/noc/noc_link.cpp b/vpr/src/noc/noc_link.cpp
index 855473799aa..4407642ddae 100644
--- a/vpr/src/noc/noc_link.cpp
+++ b/vpr/src/noc/noc_link.cpp
@@ -1,8 +1,9 @@
 #include "noc_link.h"
 
 // constructor
-NocLink::NocLink(NocRouterId source, NocRouterId sink, double bw)
-    : source_router(source)
+NocLink::NocLink(NocLinkId link_id, NocRouterId source, NocRouterId sink, double bw)
+    : id(link_id)
+    , source_router(source)
     , sink_router(sink)
     , bandwidth_usage(0.0)
     , bandwidth(bw) { }
@@ -58,4 +59,12 @@ double NocLink::get_congested_bandwidth_ratio() const {
 
     VTR_ASSERT(congested_bw_ratio >= 0.0);
     return congested_bw_ratio;
+}
+
+NocLinkId NocLink::get_link_id() const {
+    return id;
+}
+
+NocLink::operator NocLinkId() const {
+    return get_link_id();
 }
\ No newline at end of file
diff --git a/vpr/src/noc/noc_link.h b/vpr/src/noc/noc_link.h
index 3e7314d21ed..244fe7b6959 100644
--- a/vpr/src/noc/noc_link.h
+++ b/vpr/src/noc/noc_link.h
@@ -43,6 +43,8 @@
 
 class NocLink {
   private:
+    NocLinkId id;
+
     // the two routers that are connected by this link
     NocRouterId source_router; /*!< The router which uses this link as an outgoing edge*/
     NocRouterId sink_router;   /*!< The router which uses this link as an incoming edge*/
@@ -51,7 +53,7 @@ class NocLink {
     double bandwidth; /*!< Represents the maximum bits per second that can be transmitted over the link without causing congestion*/
 
   public:
-    NocLink(NocRouterId source_router, NocRouterId sink_router, double bw);
+    NocLink(NocLinkId link_id, NocRouterId source_router, NocRouterId sink_router, double bw);
 
     // getters
 
@@ -106,6 +108,10 @@ class NocLink {
     double get_congested_bandwidth() const;
 
     double get_congested_bandwidth_ratio() const;
+
+    NocLinkId get_link_id() const;
+
+    operator NocLinkId() const;
 };
 
 #endif
\ No newline at end of file
diff --git a/vpr/src/noc/noc_storage.cpp b/vpr/src/noc/noc_storage.cpp
index d4c717b3971..0bf1a80cf78 100644
--- a/vpr/src/noc/noc_storage.cpp
+++ b/vpr/src/noc/noc_storage.cpp
@@ -100,11 +100,14 @@ void NocStorage::add_router(int id, int grid_position_x, int grid_posistion_y, i
 
 void NocStorage::add_link(NocRouterId source, NocRouterId sink) {
     VTR_ASSERT_MSG(!built_noc, "NoC already built, cannot modify further.");
+
+    // the new link will be added to the back of the list,
+    // so we can use the total number of links added so far as id
+    NocLinkId added_link_id((int)link_storage.size());
+
     double link_bandwidth = get_noc_link_bandwidth();
-    link_storage.emplace_back(source, sink, link_bandwidth);
+    link_storage.emplace_back(added_link_id, source, sink, link_bandwidth);
 
-    // the newly added link was added to the back of the list, so we can get the id as the last element in the list
-    NocLinkId added_link_id((int)link_storage.size() - 1);
     router_link_list[source].push_back(added_link_id);
 
     return;
diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp
index 55d3c6296d1..67351fcdb91 100644
--- a/vpr/src/place/initial_noc_placement.cpp
+++ b/vpr/src/place/initial_noc_placement.cpp
@@ -160,6 +160,7 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     // Initialize NoC-related costs
     costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost();
     costs.noc_latency_cost = comp_noc_latency_cost(noc_opts);
+    costs.noc_congestion_cost = comp_noc_congestion_cost(noc_opts);
     update_noc_normalization_factors(costs);
     costs.cost = calculate_noc_cost(costs, noc_opts);
 
@@ -213,7 +214,8 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
 
             double noc_aggregate_bandwidth_delta_c = 0.0;
             double noc_latency_delta_c = 0.0;
-            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_aggregate_bandwidth_delta_c, noc_latency_delta_c, noc_opts);
+            double noc_congestion_delta_c = 0.0;
+            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_aggregate_bandwidth_delta_c, noc_latency_delta_c, noc_congestion_delta_c, noc_opts);
             double delta_cost = (noc_opts.noc_placement_weighting) * (noc_latency_delta_c * costs.noc_latency_cost_norm + noc_aggregate_bandwidth_delta_c * costs.noc_aggregate_bandwidth_cost_norm);
 
             double prob = starting_prob - i_move * prob_step;
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 59e10ebaac1..941e7514fc1 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -7,6 +7,10 @@ static vtr::vector<NocTrafficFlowId, TrafficFlowPlaceCost> traffic_flow_costs, p
 
 /* Keeps track of traffic flows that have been updated at each attempted placement move*/
 static std::vector<NocTrafficFlowId> affected_traffic_flows;
+
+static vtr::vector<NocLinkId , double> link_congestion_costs, proposed_link_congestion_costs;
+
+static std::unordered_set<NocLinkId> affected_noc_links;
 /*********************************************************** *****************************/
 
 /**
@@ -21,6 +25,8 @@ static std::vector<NocTrafficFlowId> affected_traffic_flows;
  */
 static bool select_random_router_cluster(ClusterBlockId& b_from, t_pl_loc& from, t_logical_block_type_ptr& cluster_from_type);
 
+static std::vector<NocLinkId> find_affected_links_by_flow_reroute(std::vector<NocLinkId>& prev_links, std::vector<NocLinkId>& curr_links);
+
 void initial_noc_routing(void) {
     // need to update the link usages within after routing all the traffic flows
     // also need to route all the traffic flows and store them
@@ -60,11 +66,13 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
     // Initialize traffic_flow_costs
     costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost();
     costs.noc_latency_cost = comp_noc_latency_cost(noc_opts);
+    costs.noc_congestion_cost = comp_noc_congestion_cost(noc_opts);
 }
 
-void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, double& noc_aggregate_bandwidth_delta_c, double& noc_latency_delta_c, const t_noc_opts& noc_opts) {
+void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, double& noc_aggregate_bandwidth_delta_c, double& noc_latency_delta_c, double& noc_congestion_delta_c, const t_noc_opts& noc_opts) {
     VTR_ASSERT_SAFE(noc_aggregate_bandwidth_delta_c == 0.);
     VTR_ASSERT_SAFE(noc_latency_delta_c == 0.);
+    VTR_ASSERT_SAFE(noc_congestion_delta_c == 0.);
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
     NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
@@ -74,6 +82,7 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move
     std::unordered_set<NocTrafficFlowId> updated_traffic_flows;
 
     affected_traffic_flows.clear();
+    affected_noc_links.clear();
 
     // go through the moved blocks and process them only if they are NoC routers
     for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; ++iblk) {
@@ -100,6 +109,12 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move
         noc_aggregate_bandwidth_delta_c += proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth - traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
         noc_latency_delta_c += proposed_traffic_flow_costs[traffic_flow_id].latency - traffic_flow_costs[traffic_flow_id].latency;
     }
+
+    for (const auto& link_id : affected_noc_links) {
+        const auto& link = noc_ctx.noc_model.get_single_noc_link(link_id);
+        proposed_link_congestion_costs[link] = calculate_link_congestion_cost(link, noc_opts);
+        noc_congestion_delta_c += proposed_link_congestion_costs[link] - link_congestion_costs[link];
+    }
 }
 
 void commit_noc_costs() {
@@ -157,18 +172,34 @@ void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, Noc
     // get all the associated traffic flows for the logical router cluster block
     const std::vector<NocTrafficFlowId>* assoc_traffic_flows = noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(moved_block_router_id);
 
+//    std::unordered_set<NocLinkId> prev_route_links, curr_route_links;
+
     // now check if there are any associated traffic flows
     if (assoc_traffic_flows != nullptr) {
         // There are traffic flows associated to the current router block so process them
         for (auto& traffic_flow_id : *assoc_traffic_flows) {
             // first check to see whether we have already re-routed the current traffic flow and only re-route it if we haven't already.
             if (updated_traffic_flows.find(traffic_flow_id) == updated_traffic_flows.end()) {
+                // get all links for this flow route before it is rerouted
+                // The returned const std::vector<NocLinkId>& is copied so that we can modify (sort) it
+                std::vector<NocLinkId> prev_traffic_flow_links = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
+
                 // now update the current traffic flow by re-routing it based on the new locations of its src and destination routers
                 re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_model, noc_flows_router);
 
                 // now make sure we don't update this traffic flow a second time by adding it to the group of updated traffic flows
                 updated_traffic_flows.insert(traffic_flow_id);
 
+                // get all links for this flow route after it is rerouted
+                std::vector<NocLinkId> curr_traffic_flow_links = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
+
+                // find links that appear in the old route or the new one, but not both of them
+                // these are the links whose bandwidth utilization is affected by rerouting
+                auto unique_links = find_affected_links_by_flow_reroute(prev_traffic_flow_links, curr_traffic_flow_links);
+
+                // update the static data structure to remember which links were affected by router swap
+                affected_noc_links.insert(unique_links.begin(), unique_links.end());
+
                 // update global datastructures to indicate that the current traffic flow was affected due to router cluster blocks being swapped
                 affected_traffic_flows.push_back(traffic_flow_id);
             }
@@ -256,6 +287,13 @@ void update_noc_normalization_factors(t_placer_costs& costs) {
     costs.noc_aggregate_bandwidth_cost_norm = std::min(1 / costs.noc_aggregate_bandwidth_cost, MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST);
     costs.noc_latency_cost_norm = std::min(1 / costs.noc_latency_cost, MAX_INV_NOC_LATENCY_COST);
 
+    // to avoid division by zero
+    if (costs.noc_congestion_cost > 0.0) {
+        costs.noc_congestion_cost_norm = std::min(1 / costs.noc_congestion_cost, MAX_INV_NOC_CONGESTION_COST);
+    } else {
+        costs.noc_congestion_cost_norm = MAX_INV_NOC_CONGESTION_COST;
+    }
+
     return;
 }
 
@@ -267,7 +305,7 @@ double calculate_noc_cost(const t_placer_costs& costs, const t_noc_opts& noc_opt
 
 double comp_noc_aggregate_bandwidth_cost(void) {
     // used to get traffic flow route information
-    auto& noc_ctx = g_vpr_ctx.mutable_noc();
+    auto& noc_ctx = g_vpr_ctx.noc();
     // datastructure that stores all the traffic flow routes
     const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
@@ -293,7 +331,7 @@ double comp_noc_aggregate_bandwidth_cost(void) {
 
 double comp_noc_latency_cost(const t_noc_opts& noc_opts) {
     // used to get traffic flow route information
-    auto& noc_ctx = g_vpr_ctx.mutable_noc();
+    auto& noc_ctx = g_vpr_ctx.noc();
     // datastructure that stores all the traffic flow routes
     const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
@@ -310,13 +348,33 @@ double comp_noc_latency_cost(const t_noc_opts& noc_opts) {
         // store the calculated latency for the current traffic flow in local datastructures (this also initializes them)
         traffic_flow_costs[traffic_flow_id].latency = curr_traffic_flow_latency_cost;
 
-        // accumulate the aggregate bandwidth cost
+        // accumulate the latency cost
         noc_latency_cost += curr_traffic_flow_latency_cost;
     }
 
     return noc_latency_cost;
 }
 
+double comp_noc_congestion_cost(const t_noc_opts& noc_opts) {
+    // Used to access NoC links
+    auto& noc_ctx = g_vpr_ctx.noc();
+
+    double congestion_cost = 0.;
+
+    // Iterate over all NoC links
+    for (const auto& link : noc_ctx.noc_model.get_noc_links()) {
+        double link_congestion_cost = calculate_link_congestion_cost(link, noc_opts);
+
+        // store the congestion cost for this link in static data structures (this also initializes them)
+        link_congestion_costs[link] = link_congestion_cost;
+
+        // accumulate the congestion cost
+        congestion_cost += link_congestion_cost;
+    }
+
+    return congestion_cost;
+}
+
 int check_noc_placement_costs(const t_placer_costs& costs, double error_tolerance, const t_noc_opts& noc_opts) {
     int error = 0;
     double noc_aggregate_bandwidth_cost_check = 0.;
@@ -415,6 +473,15 @@ double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic
     return (single_traffic_flow_latency_cost * traffic_flow_info.traffic_flow_priority);
 }
 
+double calculate_link_congestion_cost(const NocLink& link, const t_noc_opts& noc_opts) {
+    double congested_bw_ratio, congestion_cost;
+
+    congested_bw_ratio = link.get_congested_bandwidth_ratio();
+    congestion_cost = noc_opts.noc_congestion_weighting * congested_bw_ratio;
+
+    return congestion_cost;
+}
+
 int get_number_of_traffic_flows_with_latency_cons_met(void) {
     // used to get traffic flow route information
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
@@ -606,4 +673,25 @@ void write_noc_placement_file(const std::string& file_name) {
     noc_placement_file.close();
 
     return;
+}
+
+static std::vector<NocLinkId> find_affected_links_by_flow_reroute(std::vector<NocLinkId>& prev_links, std::vector<NocLinkId>& curr_links) {
+    // Sort both link containers
+    std::sort(prev_links.begin(), prev_links.end());
+    std::sort(curr_links.begin(), curr_links.end());
+
+    // stores links that appear either in prev_links or curr_links but not both of them
+    std::vector<NocLinkId> unique_links;
+
+    // find links that are unique to prev_links
+    std::set_difference(prev_links.begin(), prev_links.end(),
+                        curr_links.begin(), curr_links.end(),
+                        std::back_inserter(unique_links));
+
+    // find links that are unique to curr_links
+    std::set_difference(curr_links.begin(), curr_links.end(),
+                        prev_links.begin(), prev_links.end(),
+                        std::back_inserter(unique_links));
+
+    return unique_links;
 }
\ No newline at end of file
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index a8e27654081..c3eda74ff0f 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -19,6 +19,9 @@ constexpr double MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST = 1.;
 // we expect the latency costs to be in the pico-second range, and we don't expect it to go lower than that. So if the latency costs go below the pico-second range we trim the normalization value to be no higher than 1/ps
 // This should be updated if the delays become lower
 constexpr double MAX_INV_NOC_LATENCY_COST = 1.e12;
+// the congestion cost for a link is measured as the proportion of the overloaded BW to the link capacity
+// We assume that when a link congested, it is overloaded with at least 0.1% of its BW capacity
+constexpr double MAX_INV_NOC_CONGESTION_COST = 1.e3;
 
 // we don't expect the noc_latency cost to ever go below 1 pico second.
 // So this value represents the lowest possible latency cost.
@@ -100,7 +103,7 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
  * NoC latency cost caused by a placer move is stored
  * here.
  */
-void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, double& noc_aggregate_bandwidth_delta_c, double& noc_latency_delta_c, const t_noc_opts& noc_opts);
+void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, double& noc_aggregate_bandwidth_delta_c, double& noc_latency_delta_c, double& noc_congestion_delta_c, const t_noc_opts& noc_opts);
 
 /**
  * @brief Updates static datastructures found in 'noc_place_utils.cpp'
@@ -302,6 +305,8 @@ double comp_noc_aggregate_bandwidth_cost(void);
  */
 double comp_noc_latency_cost(const t_noc_opts& noc_opts);
 
+double comp_noc_congestion_cost(const t_noc_opts& noc_opts);
+
 /**
  * @brief Given a placement state the NoC costs are re-computed
  * from scratch and compared to the current NoC placement costs.
@@ -369,6 +374,8 @@ double calculate_traffic_flow_aggregate_bandwidth_cost(const std::vector<NocLink
  */
 double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route, const NocStorage& noc_model, const t_noc_traffic_flow& traffic_flow_info, const t_noc_opts& noc_opts);
 
+double calculate_link_congestion_cost(const NocLink& link, const t_noc_opts& noc_opts);
+
 /**
  * @brief Goes through all the traffic flows and determines whether the
  * latency constraints have been met for each traffic flow. 
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 203d5d6cac8..ef865e06bb1 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -810,6 +810,7 @@ void try_place(const Netlist<>& net_list,
         // get the costs associated with the NoC
         costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost();
         costs.noc_latency_cost = comp_noc_latency_cost(noc_opts);
+        costs.noc_congestion_cost = comp_noc_congestion_cost(noc_opts);
 
         // initialize all the noc normalization factors
         update_noc_normalization_factors(costs);
@@ -1755,9 +1756,10 @@ static e_move_result try_swap(const t_annealing_state* state,
 
         double noc_aggregate_bandwidth_delta_c = 0; // change in the NoC aggregate bandwidth cost
         double noc_latency_delta_c = 0;             // change in the NoC latency cost
+        double noc_congestion_delta_c = 0.;
         /* Update the NoC datastructure and costs*/
         if (noc_opts.noc) {
-            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_aggregate_bandwidth_delta_c, noc_latency_delta_c, noc_opts);
+            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_aggregate_bandwidth_delta_c, noc_latency_delta_c, noc_congestion_delta_c, noc_opts);
 
             // Include the NoC delta costs in the total cost change for this swap
             delta_c = delta_c + noc_placement_weighting * (noc_latency_delta_c * costs->noc_latency_cost_norm + noc_aggregate_bandwidth_delta_c * costs->noc_aggregate_bandwidth_cost_norm);
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index cc903cf4f71..23a1fedcf15 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -55,6 +55,8 @@ class t_placer_costs {
     double noc_aggregate_bandwidth_cost_norm = 0.;
     double noc_latency_cost = 0.;
     double noc_latency_cost_norm = 0.;
+    double noc_congestion_cost = 0.;
+    double noc_congestion_cost_norm = 0.;
 
   public: //Constructor
     t_placer_costs(t_place_algorithm algo)
diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index c6ba3f89c10..d75f83da088 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -774,9 +774,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
         double delta_aggr_band_cost = 0.;
         double delta_laten_cost = 0.;
+        double delta_conngest_cost = 0.;
 
         // call the test function
-        find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, noc_opts);
+        find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, delta_conngest_cost, noc_opts);
 
         // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
         test_noc_bandwidth_costs += delta_aggr_band_cost;

From 79b1391a2dfce52d7f4897f1439b61fae2c959ab Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 22 Jan 2024 15:32:22 -0500
Subject: [PATCH 05/41] fix syntax errors in NoC tests

---
 vpr/src/noc/noc_storage.cpp       | 13 +++++++++++++
 vpr/src/noc/noc_storage.h         | 13 +++++++++++++
 vpr/test/test_noc_place_utils.cpp |  9 ++++++---
 vpr/test/test_noc_storage.cpp     |  8 +++++++-
 vpr/test/test_xy_routing.cpp      | 24 ++++++++++++++++++------
 5 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/vpr/src/noc/noc_storage.cpp b/vpr/src/noc/noc_storage.cpp
index 0bf1a80cf78..af22454765d 100644
--- a/vpr/src/noc/noc_storage.cpp
+++ b/vpr/src/noc/noc_storage.cpp
@@ -56,6 +56,19 @@ const NocLink& NocStorage::get_single_noc_link(NocLinkId id) const {
     return link_storage[id];
 }
 
+NocLinkId  NocStorage::get_single_noc_link_id(NocRouterId src_router, NocRouterId dst_router) const {
+    NocLinkId link_id = NocLinkId::INVALID();
+
+    for (const auto& link : link_storage) {
+        if (link.get_source_router() == src_router && link.get_sink_router() == dst_router) {
+            link_id = link.get_link_id();
+            break;
+        }
+    }
+
+    return link_id;
+}
+
 NocLink& NocStorage::get_single_mutable_noc_link(NocLinkId id) {
     return link_storage[id];
 }
diff --git a/vpr/src/noc/noc_storage.h b/vpr/src/noc/noc_storage.h
index 0749def9de6..4870ea34be3 100644
--- a/vpr/src/noc/noc_storage.h
+++ b/vpr/src/noc/noc_storage.h
@@ -269,6 +269,19 @@ class NocStorage {
      */
     const NocLink& get_single_noc_link(NocLinkId id) const;
 
+    /**
+     * @brief Given source and sink router identifiers, this function
+     * finds a link connecting these routers and returns it identifier.
+     * If such a link does not exist, an invalid id is returned.
+     *
+     * @param src_router The unique router identifier for the source router.
+     * @param dst_router The unique router identifier for the destination router.
+     * @return A link identifier (NocLinkId) that connects the source router
+     * to the destination router. NocLinkId::INVALID() is such a link is not
+     * found.
+     */
+    NocLinkId  get_single_noc_link_id(NocRouterId src_router, NocRouterId dst_router) const;
+
     /**
      * @brief Given a unique link identifier, get the corresponding link
      * within the NoC. The link can be modified, so the intended use
diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index d75f83da088..ec520d21c8a 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -906,9 +906,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
     double delta_aggr_band_cost = 0.;
     double delta_laten_cost = 0.;
+    double delta_cong_cost = 0.;
 
     // call the test function
-    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, noc_opts);
+    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, delta_cong_cost, noc_opts);
 
     // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
     test_noc_bandwidth_costs += delta_aggr_band_cost;
@@ -998,9 +999,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // reset the delta costs
     delta_aggr_band_cost = 0.;
     delta_laten_cost = 0.;
+    delta_cong_cost = 0.;
 
     // call the test function
-    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, noc_opts);
+    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, delta_cong_cost, noc_opts);
 
     // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
     test_noc_bandwidth_costs += delta_aggr_band_cost;
@@ -1062,9 +1064,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // reset the delta costs
     delta_aggr_band_cost = 0.;
     delta_laten_cost = 0.;
+    delta_cong_cost = 0.;
 
     // call the test function
-    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, noc_opts);
+    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, delta_cong_cost, noc_opts);
 
     // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
     test_noc_bandwidth_costs += delta_aggr_band_cost;
diff --git a/vpr/test/test_noc_storage.cpp b/vpr/test/test_noc_storage.cpp
index 1cdc57550ad..a1255d31930 100644
--- a/vpr/test/test_noc_storage.cpp
+++ b/vpr/test/test_noc_storage.cpp
@@ -155,6 +155,8 @@ TEST_CASE("test_add_link", "[vpr_noc]") {
 
     // allocate the size for outgoing link vector for each router
     test_noc.make_room_for_noc_router_link_list();
+    // incremental counter used as NocLinkId
+    int noc_link_id_counter = 0;
 
     for (int source_router_id = 0; source_router_id < NUM_OF_ROUTERS; source_router_id++) {
         source = (NocRouterId)source_router_id;
@@ -164,8 +166,12 @@ TEST_CASE("test_add_link", "[vpr_noc]") {
 
             // makes sure we do not create a link for a router who acts as a sink and source
             if (source_router_id != sink_router_id) {
+                // converting the counter to link index
+                link_id = (NocLinkId)noc_link_id_counter;
+                noc_link_id_counter++;
+
                 // add link to the golden reference
-                golden_set.emplace_back(source, sink);
+                golden_set.emplace_back(link_id, source, sink, 0.0);
 
                 // add the link to the NoC
                 test_noc.add_link(source, sink);
diff --git a/vpr/test/test_xy_routing.cpp b/vpr/test/test_xy_routing.cpp
index 67517271f43..49b58662ca2 100644
--- a/vpr/test/test_xy_routing.cpp
+++ b/vpr/test/test_xy_routing.cpp
@@ -109,7 +109,9 @@ TEST_CASE("test_route_flow", "[vpr_noc_xy_routing]") {
         std::vector<NocLink> golden_path;
 
         for (int current_router = 7; current_router != 4; current_router--) {
-            golden_path.emplace_back(NocLink(NocRouterId(current_router), NocRouterId(current_router - 1)));
+            NocLinkId  link_id = noc_model.get_single_noc_link_id(NocRouterId(current_router), NocRouterId(current_router - 1));
+            const auto& link = noc_model.get_single_noc_link(link_id);
+            golden_path.push_back(link);
         }
 
         // store the route found by the algorithm
@@ -131,7 +133,9 @@ TEST_CASE("test_route_flow", "[vpr_noc_xy_routing]") {
         std::vector<NocLink> golden_path;
 
         for (int current_row = 0; current_row < 3; current_row++) {
-            golden_path.emplace_back(NocLink(NocRouterId(current_row * 4 + 2), NocRouterId((current_row + 1) * 4 + 2)));
+            NocLinkId  link_id = noc_model.get_single_noc_link_id(NocRouterId(current_row * 4 + 2), NocRouterId((current_row + 1) * 4 + 2));
+            const auto& link = noc_model.get_single_noc_link(link_id);
+            golden_path.push_back(link);
         }
 
         // store the route found by the algorithm
@@ -154,12 +158,16 @@ TEST_CASE("test_route_flow", "[vpr_noc_xy_routing]") {
 
         // generate the horizontal path first
         for (int current_router = 3; current_router != 0; current_router--) {
-            golden_path.emplace_back(NocLink(NocRouterId(current_router), NocRouterId(current_router - 1)));
+            NocLinkId  link_id = noc_model.get_single_noc_link_id(NocRouterId(current_router), NocRouterId(current_router - 1));
+            const auto& link = noc_model.get_single_noc_link(link_id);
+            golden_path.push_back(link);
         }
 
         // generate the vertical path next
         for (int current_row = 0; current_row < 3; current_row++) {
-            golden_path.emplace_back(NocLink(NocRouterId(current_row * 4), NocRouterId((current_row + 1) * 4)));
+            NocLinkId  link_id = noc_model.get_single_noc_link_id(NocRouterId(current_row * 4), NocRouterId((current_row + 1) * 4));
+            const auto& link = noc_model.get_single_noc_link(link_id);
+            golden_path.push_back(link);
         }
 
         // store the route found by the algorithm
@@ -185,12 +193,16 @@ TEST_CASE("test_route_flow", "[vpr_noc_xy_routing]") {
 
         // generate the horizontal path first
         for (int current_router = 12; current_router != 15; current_router++) {
-            golden_path.emplace_back(NocLink(NocRouterId(current_router), NocRouterId(current_router + 1)));
+            NocLinkId  link_id = noc_model.get_single_noc_link_id(NocRouterId(current_router), NocRouterId(current_router + 1));
+            const auto& link = noc_model.get_single_noc_link(link_id);
+            golden_path.push_back(link);
         }
 
         // generate the vertical path next
         for (int current_row = 3; current_row > 0; current_row--) {
-            golden_path.emplace_back(NocLink(NocRouterId(current_row * 4 + 3), NocRouterId((current_row - 1) * 4 + 3)));
+            NocLinkId  link_id = noc_model.get_single_noc_link_id(NocRouterId(current_row * 4 + 3), NocRouterId((current_row - 1) * 4 + 3));
+            const auto& link = noc_model.get_single_noc_link(link_id);
+            golden_path.push_back(link);
         }
 
         // store the route found by the algorithm

From 774670ae96dfd40751459a7a2c030e99e6b15e36 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 22 Jan 2024 16:11:55 -0500
Subject: [PATCH 06/41] Use NocDeltaCost instead of passing 3 arguments

---
 vpr/src/place/initial_noc_placement.cpp | 12 ++++----
 vpr/src/place/noc_place_utils.cpp       | 14 ++++-----
 vpr/src/place/noc_place_utils.h         |  8 ++++-
 vpr/src/place/place.cpp                 | 13 ++++----
 vpr/test/test_noc_place_utils.cpp       | 40 ++++++++++---------------
 5 files changed, 41 insertions(+), 46 deletions(-)

diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp
index 67351fcdb91..705a3139a4b 100644
--- a/vpr/src/place/initial_noc_placement.cpp
+++ b/vpr/src/place/initial_noc_placement.cpp
@@ -212,11 +212,9 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
         if (create_move_outcome != e_create_move::ABORT) {
             apply_move_blocks(blocks_affected);
 
-            double noc_aggregate_bandwidth_delta_c = 0.0;
-            double noc_latency_delta_c = 0.0;
-            double noc_congestion_delta_c = 0.0;
-            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_aggregate_bandwidth_delta_c, noc_latency_delta_c, noc_congestion_delta_c, noc_opts);
-            double delta_cost = (noc_opts.noc_placement_weighting) * (noc_latency_delta_c * costs.noc_latency_cost_norm + noc_aggregate_bandwidth_delta_c * costs.noc_aggregate_bandwidth_cost_norm);
+            NocDeltaCost noc_delta_c {0.0, 0.0, 0.0};
+            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c, noc_opts);
+            double delta_cost = (noc_opts.noc_placement_weighting) * (noc_delta_c.latency_delta_c * costs.noc_latency_cost_norm + noc_delta_c.aggregate_bandwidth_delta_c * costs.noc_aggregate_bandwidth_cost_norm);
 
             double prob = starting_prob - i_move * prob_step;
             bool move_accepted = accept_noc_swap(delta_cost, prob);
@@ -225,8 +223,8 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
                 costs.cost += delta_cost;
                 commit_move_blocks(blocks_affected);
                 commit_noc_costs();
-                costs.noc_aggregate_bandwidth_cost += noc_aggregate_bandwidth_delta_c;
-                costs.noc_latency_cost += noc_latency_delta_c;
+                costs.noc_aggregate_bandwidth_cost += noc_delta_c.aggregate_bandwidth_delta_c;
+                costs.noc_latency_cost += noc_delta_c.latency_delta_c;
                 if (costs.cost < checkpoint.get_cost() || !checkpoint.is_valid()) {
                     checkpoint.save_checkpoint(costs.cost);
                 }
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 941e7514fc1..6595a82cfb9 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -69,10 +69,10 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
     costs.noc_congestion_cost = comp_noc_congestion_cost(noc_opts);
 }
 
-void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, double& noc_aggregate_bandwidth_delta_c, double& noc_latency_delta_c, double& noc_congestion_delta_c, const t_noc_opts& noc_opts) {
-    VTR_ASSERT_SAFE(noc_aggregate_bandwidth_delta_c == 0.);
-    VTR_ASSERT_SAFE(noc_latency_delta_c == 0.);
-    VTR_ASSERT_SAFE(noc_congestion_delta_c == 0.);
+void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, NocDeltaCost& delta_c, const t_noc_opts& noc_opts) {
+    VTR_ASSERT_SAFE(delta_c.aggregate_bandwidth_delta_c == 0.);
+    VTR_ASSERT_SAFE(delta_c.latency_delta_c == 0.);
+    VTR_ASSERT_SAFE(delta_c.congestion_delta_c == 0.);
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
     NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
@@ -106,14 +106,14 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move
         proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = calculate_traffic_flow_aggregate_bandwidth_cost(traffic_flow_route, curr_traffic_flow);
         proposed_traffic_flow_costs[traffic_flow_id].latency = calculate_traffic_flow_latency_cost(traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow, noc_opts);
 
-        noc_aggregate_bandwidth_delta_c += proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth - traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
-        noc_latency_delta_c += proposed_traffic_flow_costs[traffic_flow_id].latency - traffic_flow_costs[traffic_flow_id].latency;
+        delta_c.aggregate_bandwidth_delta_c += proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth - traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
+        delta_c.latency_delta_c += proposed_traffic_flow_costs[traffic_flow_id].latency - traffic_flow_costs[traffic_flow_id].latency;
     }
 
     for (const auto& link_id : affected_noc_links) {
         const auto& link = noc_ctx.noc_model.get_single_noc_link(link_id);
         proposed_link_congestion_costs[link] = calculate_link_congestion_cost(link, noc_opts);
-        noc_congestion_delta_c += proposed_link_congestion_costs[link] - link_congestion_costs[link];
+        delta_c.congestion_delta_c += proposed_link_congestion_costs[link] - link_congestion_costs[link];
     }
 }
 
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index c3eda74ff0f..2d81a4fa344 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -39,6 +39,12 @@ struct TrafficFlowPlaceCost {
     double latency = -1;
 };
 
+struct NocDeltaCost {
+    double aggregate_bandwidth_delta_c = 0.0;
+    double latency_delta_c = 0.0;
+    double congestion_delta_c = 0.0;
+};
+
 /**
  * @brief Routes all the traffic flows within the NoC and updates the link usage
  * for all links. This should be called after initial placement, where all the 
@@ -103,7 +109,7 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
  * NoC latency cost caused by a placer move is stored
  * here.
  */
-void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, double& noc_aggregate_bandwidth_delta_c, double& noc_latency_delta_c, double& noc_congestion_delta_c, const t_noc_opts& noc_opts);
+void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, NocDeltaCost& delta_c, const t_noc_opts& noc_opts);
 
 /**
  * @brief Updates static datastructures found in 'noc_place_utils.cpp'
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index ef865e06bb1..dbd8b8d714b 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1754,15 +1754,14 @@ static e_move_result try_swap(const t_annealing_state* state,
             delta_c = bb_delta_c * costs->bb_cost_norm;
         }
 
-        double noc_aggregate_bandwidth_delta_c = 0; // change in the NoC aggregate bandwidth cost
-        double noc_latency_delta_c = 0;             // change in the NoC latency cost
-        double noc_congestion_delta_c = 0.;
+
+        NocDeltaCost noc_delta_c {0.0, 0.0, 0.0}; // change in NoC cost
         /* Update the NoC datastructure and costs*/
         if (noc_opts.noc) {
-            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_aggregate_bandwidth_delta_c, noc_latency_delta_c, noc_congestion_delta_c, noc_opts);
+            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c, noc_opts);
 
             // Include the NoC delta costs in the total cost change for this swap
-            delta_c = delta_c + noc_placement_weighting * (noc_latency_delta_c * costs->noc_latency_cost_norm + noc_aggregate_bandwidth_delta_c * costs->noc_aggregate_bandwidth_cost_norm);
+            delta_c = delta_c + noc_placement_weighting * (noc_delta_c.latency_delta_c * costs->noc_latency_cost_norm + noc_delta_c.aggregate_bandwidth_delta_c * costs->noc_aggregate_bandwidth_cost_norm);
         }
 
         /* 1 -> move accepted, 0 -> rejected. */
@@ -1815,8 +1814,8 @@ static e_move_result try_swap(const t_annealing_state* state,
             if (noc_opts.noc) {
                 commit_noc_costs();
 
-                costs->noc_aggregate_bandwidth_cost += noc_aggregate_bandwidth_delta_c;
-                costs->noc_latency_cost += noc_latency_delta_c;
+                costs->noc_aggregate_bandwidth_cost += noc_delta_c.aggregate_bandwidth_delta_c;
+                costs->noc_latency_cost += noc_delta_c.latency_delta_c;
             }
 
             //Highlights the new block when manual move is selected.
diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index ec520d21c8a..7a3d90eb1d9 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -772,16 +772,14 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
             }
         }
 
-        double delta_aggr_band_cost = 0.;
-        double delta_laten_cost = 0.;
-        double delta_conngest_cost = 0.;
+        NocDeltaCost delta_cost {0.0, 0.0, 0.0};
 
         // call the test function
-        find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, delta_conngest_cost, noc_opts);
+        find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, noc_opts);
 
         // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
-        test_noc_bandwidth_costs += delta_aggr_band_cost;
-        test_noc_latency_costs += delta_laten_cost;
+        test_noc_bandwidth_costs += delta_cost.aggregate_bandwidth_delta_c;
+        test_noc_latency_costs += delta_cost.latency_delta_c;
 
         // need this function to update the local datastructures that store all the traffic flow costs
         commit_noc_costs();
@@ -904,16 +902,14 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         golden_traffic_flow_latency_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
     }
 
-    double delta_aggr_band_cost = 0.;
-    double delta_laten_cost = 0.;
-    double delta_cong_cost = 0.;
+    NocDeltaCost delta_cost {0.0, 0.0, 0.0};
 
     // call the test function
-    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, delta_cong_cost, noc_opts);
+    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, noc_opts);
 
     // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
-    test_noc_bandwidth_costs += delta_aggr_band_cost;
-    test_noc_latency_costs += delta_laten_cost;
+    test_noc_bandwidth_costs += delta_cost.aggregate_bandwidth_delta_c;
+    test_noc_latency_costs += delta_cost.latency_delta_c;
 
     // need this function to update the local datastructures that store all the traffic flow costs
     commit_noc_costs();
@@ -997,16 +993,14 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     }
 
     // reset the delta costs
-    delta_aggr_band_cost = 0.;
-    delta_laten_cost = 0.;
-    delta_cong_cost = 0.;
+    delta_cost = NocDeltaCost {0.0, 0.0, 0.0};
 
     // call the test function
-    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, delta_cong_cost, noc_opts);
+    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, noc_opts);
 
     // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
-    test_noc_bandwidth_costs += delta_aggr_band_cost;
-    test_noc_latency_costs += delta_laten_cost;
+    test_noc_bandwidth_costs += delta_cost.aggregate_bandwidth_delta_c;
+    test_noc_latency_costs += delta_cost.latency_delta_c;
 
     // need this function to update the local datastructures that store all the traffic flow costs
     commit_noc_costs();
@@ -1062,16 +1056,14 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // we don't have to calculate the costs or update bandwidths because the swapped router blocks do not have any associated traffic flows //
 
     // reset the delta costs
-    delta_aggr_band_cost = 0.;
-    delta_laten_cost = 0.;
-    delta_cong_cost = 0.;
+    delta_cost = NocDeltaCost {0.0, 0.0, 0.0};
 
     // call the test function
-    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, delta_cong_cost, noc_opts);
+    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, noc_opts);
 
     // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
-    test_noc_bandwidth_costs += delta_aggr_band_cost;
-    test_noc_latency_costs += delta_laten_cost;
+    test_noc_bandwidth_costs += delta_cost.aggregate_bandwidth_delta_c;
+    test_noc_latency_costs += delta_cost.latency_delta_c;
 
     // need this function to update the local datastructures that store all the traffic flow costs
     commit_noc_costs();

From c7e3cb6bf2ce3860e54886c76a73e52bbcbdba93 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 22 Jan 2024 16:40:57 -0500
Subject: [PATCH 07/41] Add operator+=() to t_placer_costs.

I moved NocDeltaCost declaration from noc_place_utils.h to place_util.h to resolve a cyclic dependency.
Forward declaration of NocDeltaCost and t_placer_costs did not solve the problem as the compiler complained about GridTileLookup.
---
 vpr/src/place/initial_noc_placement.cpp |  3 +--
 vpr/src/place/noc_place_utils.h         |  6 ------
 vpr/src/place/place.cpp                 |  4 +---
 vpr/src/place/place_util.cpp            | 13 +++++++++++++
 vpr/src/place/place_util.h              | 18 ++++++++++++++++++
 5 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp
index 705a3139a4b..1ee212eb473 100644
--- a/vpr/src/place/initial_noc_placement.cpp
+++ b/vpr/src/place/initial_noc_placement.cpp
@@ -223,8 +223,7 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
                 costs.cost += delta_cost;
                 commit_move_blocks(blocks_affected);
                 commit_noc_costs();
-                costs.noc_aggregate_bandwidth_cost += noc_delta_c.aggregate_bandwidth_delta_c;
-                costs.noc_latency_cost += noc_delta_c.latency_delta_c;
+                costs += noc_delta_c;
                 if (costs.cost < checkpoint.get_cost() || !checkpoint.is_valid()) {
                     checkpoint.save_checkpoint(costs.cost);
                 }
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index 2d81a4fa344..eae3619730f 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -39,12 +39,6 @@ struct TrafficFlowPlaceCost {
     double latency = -1;
 };
 
-struct NocDeltaCost {
-    double aggregate_bandwidth_delta_c = 0.0;
-    double latency_delta_c = 0.0;
-    double congestion_delta_c = 0.0;
-};
-
 /**
  * @brief Routes all the traffic flows within the NoC and updates the link usage
  * for all links. This should be called after initial placement, where all the 
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index dbd8b8d714b..15ab24e2eb4 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1813,9 +1813,7 @@ static e_move_result try_swap(const t_annealing_state* state,
             }
             if (noc_opts.noc) {
                 commit_noc_costs();
-
-                costs->noc_aggregate_bandwidth_cost += noc_delta_c.aggregate_bandwidth_delta_c;
-                costs->noc_latency_cost += noc_delta_c.latency_delta_c;
+                *costs += noc_delta_c;
             }
 
             //Highlights the new block when manual move is selected.
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 75ff2d2bf12..8c30cbaf681 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -73,6 +73,19 @@ void t_placer_costs::update_norm_factors() {
     }
 }
 
+/**
+ * @brief Accumulates NoC cost difference terms
+ *
+ * @param noc_delta_cost NoC cost difference if the swap is accepted
+ */
+t_placer_costs& t_placer_costs::operator+=(const NocDeltaCost& noc_delta_cost) {
+    noc_aggregate_bandwidth_cost += noc_delta_cost.aggregate_bandwidth_delta_c;
+    noc_latency_cost += noc_delta_cost.latency_delta_c;
+    noc_congestion_cost += noc_delta_cost.congestion_delta_c;
+
+    return *this;
+}
+
 ///@brief Constructor: Initialize all annealing state variables and macros.
 t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
                                      float first_t,
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 23a1fedcf15..30820e44ae8 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -12,6 +12,12 @@
 #include "vtr_vector_map.h"
 #include "globals.h"
 
+struct NocDeltaCost {
+    double aggregate_bandwidth_delta_c = 0.0;
+    double latency_delta_c = 0.0;
+    double congestion_delta_c = 0.0;
+};
+
 /**
  * @brief Data structure that stores different cost values in the placer.
  *
@@ -34,6 +40,17 @@
  *   @param timing_cost_norm The normalization factor for the timing cost, which
  *              is upper-bounded by the value of MAX_INV_TIMING_COST.
  *
+ *   @param noc_aggregate_bandwidth_cost The aggregate NoC bandwidth cost
+ *   @param noc_aggregate_bandwidth_cost_norm The normalization factor for
+ *   the aggregate bandwidth cost
+ *   @param noc_latency_cost The NoC latency cost,
+ *   calculated as the sum of latencies experienced by each traffic flow
+ *   @param noc_latency_cost_norm The normalization factor for the latency cost
+ *   @param noc_congestion_cost The NoC congestion cost, i.e. how over-utilized
+ *   NoC links are
+ *   @param noc_congestion_cost_norm The normalization factor for the NoC
+ *   congestion cost
+ *
  *   @param MAX_INV_TIMING_COST Stops inverse timing cost from going to infinity
  *              with very lax timing constraints, which avoids multiplying by a
  *              gigantic timing_cost_norm when auto-normalizing. The exact value
@@ -65,6 +82,7 @@ class t_placer_costs {
 
   public: //Mutator
     void update_norm_factors();
+    t_placer_costs& operator+=(const NocDeltaCost& noc_delta_cost);
 
   private:
     double MAX_INV_TIMING_COST = 1.e12;

From 8fcdda55be28ca2fa650556eacae68805ad536d6 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 22 Jan 2024 19:02:34 -0500
Subject: [PATCH 08/41] Updated commit_noc_costs(),
 allocate_and_load_noc_placement_structs(), and free_noc_placement_structs()
 for NoC congestion costs

---
 vpr/src/place/noc_place_utils.cpp | 43 ++++++++++++++++++++++++++++---
 vpr/src/place/noc_place_utils.h   |  6 +++--
 2 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 6595a82cfb9..67626a1d0c9 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -53,6 +53,7 @@ void initial_noc_routing(void) {
 }
 
 void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs) {
+    // used to access NoC links and modify them
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
     // Zero out bandwidth usage for all links
@@ -103,28 +104,53 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move
         // get the current traffic flow info
         const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
+        // calculate the new aggregate bandwidth and latency costs for the affected traffic flow
+        // store them in case the proposed swap is reverted
         proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = calculate_traffic_flow_aggregate_bandwidth_cost(traffic_flow_route, curr_traffic_flow);
         proposed_traffic_flow_costs[traffic_flow_id].latency = calculate_traffic_flow_latency_cost(traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow, noc_opts);
 
+        // compute how much the aggregate bandwidth and latency costs change with this swap
         delta_c.aggregate_bandwidth_delta_c += proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth - traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
         delta_c.latency_delta_c += proposed_traffic_flow_costs[traffic_flow_id].latency - traffic_flow_costs[traffic_flow_id].latency;
     }
 
+    // Iterate over all affected links and calculate their new congestion cost and store it in case the swap is reverted
     for (const auto& link_id : affected_noc_links) {
+        // get the affected link
         const auto& link = noc_ctx.noc_model.get_single_noc_link(link_id);
+
+        // calculate the new congestion cost for the link and store it for possible reversion
         proposed_link_congestion_costs[link] = calculate_link_congestion_cost(link, noc_opts);
+
+        // compute how much the congestion cost changes with this swap
         delta_c.congestion_delta_c += proposed_link_congestion_costs[link] - link_congestion_costs[link];
     }
 }
 
 void commit_noc_costs() {
+    // used to access NoC links
+    auto& noc_ctx = g_vpr_ctx.mutable_noc();
+
+    // Iterate over all the traffic flows affected by the proposed router swap
     for (auto& traffic_flow_id : affected_traffic_flows) {
         // update the traffic flow costs
         traffic_flow_costs[traffic_flow_id] = proposed_traffic_flow_costs[traffic_flow_id];
 
         // reset the proposed traffic flows costs
-        proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = -1;
-        proposed_traffic_flow_costs[traffic_flow_id].latency = -1;
+        proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = INVALID_NOC_COST_TERM;
+        proposed_traffic_flow_costs[traffic_flow_id].latency = INVALID_NOC_COST_TERM;
+    }
+
+    // Iterate over all the NoC links whose bandwidth utilization was affected by the proposed move
+    for(auto link_id : affected_noc_links) {
+        // get the affected link
+        const auto& link = noc_ctx.noc_model.get_single_noc_link(link_id);
+
+        // commit the new link congestion cost
+        link_congestion_costs[link] = proposed_link_congestion_costs[link];
+
+        // invalidate the proposed link congestion flow costs
+        proposed_link_congestion_costs[link] = INVALID_NOC_COST_TERM;
     }
 
     return;
@@ -521,8 +547,13 @@ void allocate_and_load_noc_placement_structs(void) {
 
     int number_of_traffic_flows = noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows();
 
-    traffic_flow_costs.resize(number_of_traffic_flows);
-    proposed_traffic_flow_costs.resize(number_of_traffic_flows);
+    traffic_flow_costs.resize(number_of_traffic_flows, {INVALID_NOC_COST_TERM, INVALID_NOC_COST_TERM});
+    proposed_traffic_flow_costs.resize(number_of_traffic_flows, {INVALID_NOC_COST_TERM, INVALID_NOC_COST_TERM});
+
+    int number_of_noc_links = noc_ctx.noc_model.get_number_of_noc_links();
+
+    link_congestion_costs.resize(number_of_noc_links, INVALID_NOC_COST_TERM);
+    proposed_link_congestion_costs.resize(number_of_noc_links, INVALID_NOC_COST_TERM);
 
     return;
 }
@@ -532,6 +563,10 @@ void free_noc_placement_structs(void) {
     vtr::release_memory(proposed_traffic_flow_costs);
     vtr::release_memory(affected_traffic_flows);
 
+    vtr::release_memory(link_congestion_costs);
+    vtr::release_memory(proposed_link_congestion_costs);
+    vtr::release_memory(affected_noc_links);
+
     return;
 }
 
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index eae3619730f..a29c5040144 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -27,6 +27,8 @@ constexpr double MAX_INV_NOC_CONGESTION_COST = 1.e3;
 // So this value represents the lowest possible latency cost.
 constexpr double MIN_EXPECTED_NOC_LATENCY_COST = 1.e-12;
 
+constexpr double INVALID_NOC_COST_TERM = -1.0;
+
 /**
  * @brief Each traffic flow cost consists of two components:
  *        1) traffic flow aggregate bandwidth (sum over all used links of the traffic flow bandwidth)
@@ -35,8 +37,8 @@ constexpr double MIN_EXPECTED_NOC_LATENCY_COST = 1.e-12;
  *        traffic flow cost.
  */
 struct TrafficFlowPlaceCost {
-    double aggregate_bandwidth = -1;
-    double latency = -1;
+    double aggregate_bandwidth = INVALID_NOC_COST_TERM;
+    double latency = INVALID_NOC_COST_TERM;
 };
 
 /**

From 394602967689fae57634baf7a2c3722883b0cc23 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 23 Jan 2024 14:49:16 -0500
Subject: [PATCH 09/41] Modified noc_place_utils.cpp to compute congestion cost

---
 vpr/src/noc/noc_routing_algorithm_creator.cpp |   2 +-
 vpr/src/noc/noc_routing_algorithm_creator.h   |   2 +-
 vpr/src/place/initial_noc_placement.cpp       |   4 +-
 vpr/src/place/noc_place_utils.cpp             | 104 +++++++++++-------
 vpr/src/place/noc_place_utils.h               |  12 +-
 vpr/src/place/place.cpp                       |  27 +++--
 vpr/src/place/place_util.cpp                  |   8 +-
 vpr/src/place/place_util.h                    |  10 +-
 vpr/test/test_noc_place_utils.cpp             |  45 ++++----
 9 files changed, 124 insertions(+), 90 deletions(-)

diff --git a/vpr/src/noc/noc_routing_algorithm_creator.cpp b/vpr/src/noc/noc_routing_algorithm_creator.cpp
index 0252f1fefca..65afcdc0a90 100644
--- a/vpr/src/noc/noc_routing_algorithm_creator.cpp
+++ b/vpr/src/noc/noc_routing_algorithm_creator.cpp
@@ -2,7 +2,7 @@
 #include "noc_routing_algorithm_creator.h"
 #include "vpr_error.h"
 
-NocRouting* NocRoutingAlgorithmCreator::create_routing_algorithm(std::string routing_algorithm_name) {
+NocRouting* NocRoutingAlgorithmCreator::create_routing_algorithm(const std::string& routing_algorithm_name) {
     NocRouting* noc_routing_algorithm = nullptr;
 
     if (routing_algorithm_name == "xy_routing") {
diff --git a/vpr/src/noc/noc_routing_algorithm_creator.h b/vpr/src/noc/noc_routing_algorithm_creator.h
index bca7b98abdc..b05d9f57981 100644
--- a/vpr/src/noc/noc_routing_algorithm_creator.h
+++ b/vpr/src/noc/noc_routing_algorithm_creator.h
@@ -39,7 +39,7 @@ class NocRoutingAlgorithmCreator {
      * NoC routing algorithm
      * @return NocRouting* A reference to the created NoC routing algorithm
      */
-    NocRouting* create_routing_algorithm(std::string routing_algorithm_name);
+    NocRouting* create_routing_algorithm(const std::string& routing_algorithm_name);
 };
 
 #endif
diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp
index 1ee212eb473..79717d2c421 100644
--- a/vpr/src/place/initial_noc_placement.cpp
+++ b/vpr/src/place/initial_noc_placement.cpp
@@ -212,9 +212,9 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
         if (create_move_outcome != e_create_move::ABORT) {
             apply_move_blocks(blocks_affected);
 
-            NocDeltaCost noc_delta_c {0.0, 0.0, 0.0};
+            NocCostTerms noc_delta_c {0.0, 0.0, 0.0};
             find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c, noc_opts);
-            double delta_cost = (noc_opts.noc_placement_weighting) * (noc_delta_c.latency_delta_c * costs.noc_latency_cost_norm + noc_delta_c.aggregate_bandwidth_delta_c * costs.noc_aggregate_bandwidth_cost_norm);
+            double delta_cost = (noc_opts.noc_placement_weighting) * (noc_delta_c.latency * costs.noc_latency_cost_norm + noc_delta_c.aggregate_bandwidth * costs.noc_aggregate_bandwidth_cost_norm);
 
             double prob = starting_prob - i_move * prob_step;
             bool move_accepted = accept_noc_swap(delta_cost, prob);
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 67626a1d0c9..8d2858fdf95 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -43,7 +43,7 @@ void initial_noc_routing(void) {
         const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
         // update the traffic flow route based on where the router cluster blocks are placed
-        std::vector<NocLinkId>& curr_traffic_flow_route = get_traffic_flow_route(traffic_flow_id, noc_ctx.noc_model, noc_traffic_flows_storage, *noc_ctx.noc_flows_router);
+        std::vector<NocLinkId>& curr_traffic_flow_route = route_traffic_flow(traffic_flow_id, noc_ctx.noc_model, noc_traffic_flows_storage, *noc_ctx.noc_flows_router);
 
         // update the links used in the found traffic flow route, links' bandwidth should be incremented since the traffic flow is routed
         update_traffic_flow_link_usage(curr_traffic_flow_route, noc_ctx.noc_model, 1, curr_traffic_flow.traffic_flow_bandwidth);
@@ -70,10 +70,10 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
     costs.noc_congestion_cost = comp_noc_congestion_cost(noc_opts);
 }
 
-void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, NocDeltaCost& delta_c, const t_noc_opts& noc_opts) {
-    VTR_ASSERT_SAFE(delta_c.aggregate_bandwidth_delta_c == 0.);
-    VTR_ASSERT_SAFE(delta_c.latency_delta_c == 0.);
-    VTR_ASSERT_SAFE(delta_c.congestion_delta_c == 0.);
+void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, NocCostTerms& delta_c, const t_noc_opts& noc_opts) {
+    VTR_ASSERT_SAFE(delta_c.aggregate_bandwidth == 0.);
+    VTR_ASSERT_SAFE(delta_c.latency == 0.);
+    VTR_ASSERT_SAFE(delta_c.congestion == 0.);
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
     NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
@@ -105,25 +105,24 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move
         const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
         // calculate the new aggregate bandwidth and latency costs for the affected traffic flow
-        // store them in case the proposed swap is reverted
         proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = calculate_traffic_flow_aggregate_bandwidth_cost(traffic_flow_route, curr_traffic_flow);
         proposed_traffic_flow_costs[traffic_flow_id].latency = calculate_traffic_flow_latency_cost(traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow, noc_opts);
 
         // compute how much the aggregate bandwidth and latency costs change with this swap
-        delta_c.aggregate_bandwidth_delta_c += proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth - traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
-        delta_c.latency_delta_c += proposed_traffic_flow_costs[traffic_flow_id].latency - traffic_flow_costs[traffic_flow_id].latency;
+        delta_c.aggregate_bandwidth += proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth - traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
+        delta_c.latency += proposed_traffic_flow_costs[traffic_flow_id].latency - traffic_flow_costs[traffic_flow_id].latency;
     }
 
-    // Iterate over all affected links and calculate their new congestion cost and store it in case the swap is reverted
+    // Iterate over all affected links and calculate their new congestion cost and store it
     for (const auto& link_id : affected_noc_links) {
         // get the affected link
         const auto& link = noc_ctx.noc_model.get_single_noc_link(link_id);
 
-        // calculate the new congestion cost for the link and store it for possible reversion
+        // calculate the new congestion cost for the link and store it
         proposed_link_congestion_costs[link] = calculate_link_congestion_cost(link, noc_opts);
 
         // compute how much the congestion cost changes with this swap
-        delta_c.congestion_delta_c += proposed_link_congestion_costs[link] - link_congestion_costs[link];
+        delta_c.congestion += proposed_link_congestion_costs[link] - link_congestion_costs[link];
     }
 }
 
@@ -156,7 +155,7 @@ void commit_noc_costs() {
     return;
 }
 
-std::vector<NocLinkId>& get_traffic_flow_route(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, NocRouting& noc_flows_router) {
+std::vector<NocLinkId>& route_traffic_flow(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, NocRouting& noc_flows_router) {
     // provides the positions where the affected blocks have moved to
     auto& place_ctx = g_vpr_ctx.placement();
 
@@ -198,8 +197,6 @@ void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, Noc
     // get all the associated traffic flows for the logical router cluster block
     const std::vector<NocTrafficFlowId>* assoc_traffic_flows = noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(moved_block_router_id);
 
-//    std::unordered_set<NocLinkId> prev_route_links, curr_route_links;
-
     // now check if there are any associated traffic flows
     if (assoc_traffic_flows != nullptr) {
         // There are traffic flows associated to the current router block so process them
@@ -256,7 +253,7 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect
             const std::vector<NocTrafficFlowId>* assoc_traffic_flows = noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(blk);
 
             // now check if there are any associated traffic flows
-            if (assoc_traffic_flows->size() != 0) {
+            if (assoc_traffic_flows != nullptr) {
                 // There are traffic flows associated to the current router block so process them
                 for (auto& traffic_flow_id : *assoc_traffic_flows) {
                     // first check to see whether we have already reverted the current traffic flow and only revert it if we haven't already.
@@ -288,21 +285,27 @@ void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& no
     update_traffic_flow_link_usage(curr_traffic_flow_route, noc_model, -1, curr_traffic_flow.traffic_flow_bandwidth);
 
     // now get the re-routed traffic flow route and increment all the link usages with this reverted route
-    std::vector<NocLinkId>& re_routed_traffic_flow_route = get_traffic_flow_route(traffic_flow_id, noc_model, noc_traffic_flows_storage, noc_flows_router);
+    std::vector<NocLinkId>& re_routed_traffic_flow_route = route_traffic_flow(traffic_flow_id, noc_model, noc_traffic_flows_storage, noc_flows_router);
     update_traffic_flow_link_usage(re_routed_traffic_flow_route, noc_model, 1, curr_traffic_flow.traffic_flow_bandwidth);
 
     return;
 }
 
-void recompute_noc_costs(double& new_noc_aggregate_bandwidth_cost, double& new_noc_latency_cost) {
+void recompute_noc_costs(NocCostTerms& new_cost) {
+    auto& noc_ctx = g_vpr_ctx.noc();
+
     // reset the cost variables first
-    new_noc_aggregate_bandwidth_cost = 0;
-    new_noc_latency_cost = 0;
+    new_cost = NocCostTerms{0.0, 0.0, 0.0};
 
     // go through the costs of all the traffic flows and add them up to recompute the total costs associated with the NoC
-    for (const auto& traffic_flow_id : g_vpr_ctx.noc().noc_traffic_flows_storage.get_all_traffic_flow_id()) {
-        new_noc_aggregate_bandwidth_cost += traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
-        new_noc_latency_cost += traffic_flow_costs[traffic_flow_id].latency;
+    for (const auto& traffic_flow_id : noc_ctx.noc_traffic_flows_storage.get_all_traffic_flow_id()) {
+        new_cost.aggregate_bandwidth += traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
+        new_cost.latency += traffic_flow_costs[traffic_flow_id].latency;
+    }
+
+    // Iterate over all NoC links and accumulate their congestion costs
+    for (auto& link_id : noc_ctx.noc_model.get_noc_links()) {
+        new_cost.congestion += link_congestion_costs[link_id];
     }
 
     return;
@@ -403,17 +406,22 @@ double comp_noc_congestion_cost(const t_noc_opts& noc_opts) {
 
 int check_noc_placement_costs(const t_placer_costs& costs, double error_tolerance, const t_noc_opts& noc_opts) {
     int error = 0;
-    double noc_aggregate_bandwidth_cost_check = 0.;
-    double noc_latency_cost_check = 0.;
+    NocCostTerms cost_check{0.0, 0.0, 0.0};
 
     // get current router block locations
     auto& place_ctx = g_vpr_ctx.placement();
-    const vtr::vector_map<ClusterBlockId, t_block_loc>* placed_cluster_block_locations = &place_ctx.block_locs;
+    const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations = place_ctx.block_locs;
 
     auto& noc_ctx = g_vpr_ctx.noc();
-    const NocStorage* noc_model = &noc_ctx.noc_model;
+    const NocStorage& noc_model = noc_ctx.noc_model;
     const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
+    // a copy of NoC link storage used to calculate link bandwidth utilization from scratch
+    vtr::vector<NocLinkId, NocLink> temp_noc_link_storage = noc_model.get_noc_links();
+
+    // reset bandwidth utilization for all links
+    std::for_each(temp_noc_link_storage.begin(), temp_noc_link_storage.end(), [](NocLink& link) {link.set_bandwidth_usage(0.0); });
+
     // need to create a temporary noc routing algorithm
     NocRoutingAlgorithmCreator routing_algorithm_factory;
     NocRouting* temp_noc_routing_algorithm = routing_algorithm_factory.create_routing_algorithm(noc_opts.noc_routing_algorithm);
@@ -431,41 +439,63 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
         ClusterBlockId logical_sink_router_block_id = curr_traffic_flow.sink_router_cluster_id;
 
         // get the ids of the hard router blocks where the logical router cluster blocks have been placed
-        NocRouterId source_router_block_id = noc_model->get_router_at_grid_location((*placed_cluster_block_locations)[logical_source_router_block_id].loc);
-        NocRouterId sink_router_block_id = noc_model->get_router_at_grid_location((*placed_cluster_block_locations)[logical_sink_router_block_id].loc);
+        NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(placed_cluster_block_locations[logical_source_router_block_id].loc);
+        NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(placed_cluster_block_locations[logical_sink_router_block_id].loc);
 
         // route the current traffic flow
-        temp_noc_routing_algorithm->route_flow(source_router_block_id, sink_router_block_id, temp_found_noc_route, *noc_model);
+        temp_noc_routing_algorithm->route_flow(source_router_block_id, sink_router_block_id, temp_found_noc_route, noc_model);
 
         // now calculate the costs associated to the current traffic flow and accumulate it to find the total cost of the NoC placement
         double current_flow_aggregate_bandwidth_cost = calculate_traffic_flow_aggregate_bandwidth_cost(temp_found_noc_route, curr_traffic_flow);
-        noc_aggregate_bandwidth_cost_check += current_flow_aggregate_bandwidth_cost;
+        cost_check.aggregate_bandwidth += current_flow_aggregate_bandwidth_cost;
 
-        double current_flow_latency_cost = calculate_traffic_flow_latency_cost(temp_found_noc_route, *noc_model, curr_traffic_flow, noc_opts);
-        noc_latency_cost_check += current_flow_latency_cost;
+        double current_flow_latency_cost = calculate_traffic_flow_latency_cost(temp_found_noc_route, noc_model, curr_traffic_flow, noc_opts);
+        cost_check.latency += current_flow_latency_cost;
+
+        // increase bandwidth utilization for the links that constitute the current flow's route
+        for (auto& link_id : temp_found_noc_route) {
+            auto& link = temp_noc_link_storage[link_id];
+            double curr_link_bw_util = link.get_bandwidth_usage();
+            link.set_bandwidth_usage(curr_link_bw_util + curr_traffic_flow.traffic_flow_bandwidth);
+            VTR_ASSERT(link.get_bandwidth_usage() >= 0.0);
+        }
 
         // clear the current traffic flow route, so we can route the next traffic flow
         temp_found_noc_route.clear();
     }
 
+    // Iterate over all NoC links and accumulate congestion cost
+    for(const auto& link : temp_noc_link_storage) {
+        cost_check.congestion += calculate_link_congestion_cost(link, noc_opts);
+    }
+
     // check whether the aggregate bandwidth placement cost is within the error tolerance
-    if (fabs(noc_aggregate_bandwidth_cost_check - costs.noc_aggregate_bandwidth_cost) > costs.noc_aggregate_bandwidth_cost * error_tolerance) {
+    if (fabs(cost_check.aggregate_bandwidth - costs.noc_aggregate_bandwidth_cost) > costs.noc_aggregate_bandwidth_cost * error_tolerance) {
         VTR_LOG_ERROR(
             "noc_aggregate_bandwidth_cost_check: %g and noc_aggregate_bandwidth_cost: %g differ in check_noc_placement_costs.\n",
-            noc_aggregate_bandwidth_cost_check, costs.noc_aggregate_bandwidth_cost);
+            cost_check.aggregate_bandwidth, costs.noc_aggregate_bandwidth_cost);
         error++;
     }
 
     // only check the recomputed cost if it is above our expected latency cost threshold of 1 pico-second, otherwise there is no point in checking it
-    if (noc_latency_cost_check > MIN_EXPECTED_NOC_LATENCY_COST) {
+    if (cost_check.latency > MIN_EXPECTED_NOC_LATENCY_COST) {
         // check whether the latency placement cost is within the error tolerance
-        if (fabs(noc_latency_cost_check - costs.noc_latency_cost) > costs.noc_latency_cost * error_tolerance) {
+        if (fabs(cost_check.latency - costs.noc_latency_cost) > costs.noc_latency_cost * error_tolerance) {
             VTR_LOG_ERROR(
                 "noc_latency_cost_check: %g and noc_latency_cost: %g differ in check_noc_placement_costs.\n",
-                noc_latency_cost_check, costs.noc_latency_cost);
+                cost_check.latency, costs.noc_latency_cost);
             error++;
         }
     }
+
+    // check whether the NoC congestion cost is within the error range
+    if (fabs(cost_check.congestion - costs.noc_congestion_cost) > costs.noc_congestion_cost * error_tolerance) {
+        VTR_LOG_ERROR(
+            "noc_congestion_cost_check: %g and noc_congestion_cost: %g differ in check_noc_placement_costs.\n",
+            cost_check.congestion, costs.noc_congestion_cost);
+        error++;
+    }
+
     // delete the temporary routing algorithm
     delete temp_noc_routing_algorithm;
 
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index a29c5040144..9056caa64e5 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -105,7 +105,7 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
  * NoC latency cost caused by a placer move is stored
  * here.
  */
-void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, NocDeltaCost& delta_c, const t_noc_opts& noc_opts);
+void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, NocCostTerms& delta_c, const t_noc_opts& noc_opts);
 
 /**
  * @brief Updates static datastructures found in 'noc_place_utils.cpp'
@@ -136,6 +136,12 @@ void commit_noc_costs();
  * First, the hard routers blocks that represent the placed location of
  * the router cluster blocks are identified. Then the traffic flow
  * is routed and updated.
+ *
+ * Note that this function does not update the link bandwidth utilization.
+ * update_traffic_flow_link_usage() should be called after this function
+ * to update the link utilization for the new route. If the flow is re-routed
+ * because either its source or destination are moved, update_traffic_flow_link_usage()
+ * should be used to reduce the bandwidth utilization for the old route.
  * 
  * @param traffic_flow_id Represents the traffic flow that needs to be routed
  * @param noc_model Contains all the links and routers within the NoC. Used
@@ -146,7 +152,7 @@ void commit_noc_costs();
  * flows within the NoC.
  * @return std::vector<NocLinkId>& The found route for the traffic flow.
  */
-std::vector<NocLinkId>& get_traffic_flow_route(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, NocRouting& noc_flows_router);
+std::vector<NocLinkId>& route_traffic_flow(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, NocRouting& noc_flows_router);
 
 /**
  * @brief Updates the bandwidth usages of links found in a routed traffic flow.
@@ -255,7 +261,7 @@ void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& no
  * @param new_noc_latency_cost Will store the newly computed
  * NoC latency cost for the current placement state.
  */
-void recompute_noc_costs(double& new_noc_aggregate_bandwidth_cost, double& new_noc_latency_cost);
+void recompute_noc_costs(NocCostTerms& new_cost);
 
 /**
  * @brief Updates all the cost normalization factors relevant to the NoC.
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 15ab24e2eb4..7f86947d56e 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1388,35 +1388,34 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
     }
 
     if (noc_opts.noc) {
-        double new_noc_aggregate_bandwidth_cost = 0.;
-        double new_noc_latency_cost = 0.;
-        recompute_noc_costs(new_noc_aggregate_bandwidth_cost, new_noc_latency_cost);
+        NocCostTerms new_noc_cost{0.0, 0.0, 0.0};
+        recompute_noc_costs(new_noc_cost);
 
         if (fabs(
-                new_noc_aggregate_bandwidth_cost
+                new_noc_cost.aggregate_bandwidth
                 - costs->noc_aggregate_bandwidth_cost)
             > costs->noc_aggregate_bandwidth_cost * ERROR_TOL) {
             std::string msg = vtr::string_fmt(
-                "in recompute_costs_from_scratch: new_noc_aggregate_bandwidth_cost = %g, old noc_aggregate_bandwidth_cost = %g, ERROR_TOL = %g\n",
-                new_noc_aggregate_bandwidth_cost, costs->noc_aggregate_bandwidth_cost, ERROR_TOL);
+                "in recompute_costs_from_scratch: new_noc_cost.aggregate_bandwidth = %g, old noc_aggregate_bandwidth_cost = %g, ERROR_TOL = %g\n",
+                new_noc_cost.aggregate_bandwidth, costs->noc_aggregate_bandwidth_cost, ERROR_TOL);
             VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
         }
-        costs->noc_aggregate_bandwidth_cost = new_noc_aggregate_bandwidth_cost;
+        costs->noc_aggregate_bandwidth_cost = new_noc_cost.aggregate_bandwidth;
 
         // only check if the recomputed cost and the current noc latency cost are within the error tolerance if the cost is above 1 picosecond.
         // Otherwise, there is no need to check (we expect the latency cost to be above the threshold of 1 picosecond)
-        if (new_noc_latency_cost > MIN_EXPECTED_NOC_LATENCY_COST) {
+        if (new_noc_cost.latency > MIN_EXPECTED_NOC_LATENCY_COST) {
             if (fabs(
-                    new_noc_latency_cost
+                    new_noc_cost.latency
                     - costs->noc_latency_cost)
                 > costs->noc_latency_cost * ERROR_TOL) {
                 std::string msg = vtr::string_fmt(
-                    "in recompute_costs_from_scratch: new_noc_latency_cost = %g, old noc_latency_cost = %g, ERROR_TOL = %g\n",
-                    new_noc_latency_cost, costs->noc_latency_cost, ERROR_TOL);
+                    "in recompute_costs_from_scratch: new_noc_cost.latency = %g, old noc_latency_cost = %g, ERROR_TOL = %g\n",
+                    new_noc_cost.latency, costs->noc_latency_cost, ERROR_TOL);
                 VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
             }
         }
-        costs->noc_latency_cost = new_noc_latency_cost;
+        costs->noc_latency_cost = new_noc_cost.latency;
     }
 }
 
@@ -1755,13 +1754,13 @@ static e_move_result try_swap(const t_annealing_state* state,
         }
 
 
-        NocDeltaCost noc_delta_c {0.0, 0.0, 0.0}; // change in NoC cost
+        NocCostTerms noc_delta_c {0.0, 0.0, 0.0}; // change in NoC cost
         /* Update the NoC datastructure and costs*/
         if (noc_opts.noc) {
             find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c, noc_opts);
 
             // Include the NoC delta costs in the total cost change for this swap
-            delta_c = delta_c + noc_placement_weighting * (noc_delta_c.latency_delta_c * costs->noc_latency_cost_norm + noc_delta_c.aggregate_bandwidth_delta_c * costs->noc_aggregate_bandwidth_cost_norm);
+            delta_c = delta_c + noc_placement_weighting * (noc_delta_c.latency * costs->noc_latency_cost_norm + noc_delta_c.aggregate_bandwidth * costs->noc_aggregate_bandwidth_cost_norm);
         }
 
         /* 1 -> move accepted, 0 -> rejected. */
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 8c30cbaf681..3e63f1e9881 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -78,10 +78,10 @@ void t_placer_costs::update_norm_factors() {
  *
  * @param noc_delta_cost NoC cost difference if the swap is accepted
  */
-t_placer_costs& t_placer_costs::operator+=(const NocDeltaCost& noc_delta_cost) {
-    noc_aggregate_bandwidth_cost += noc_delta_cost.aggregate_bandwidth_delta_c;
-    noc_latency_cost += noc_delta_cost.latency_delta_c;
-    noc_congestion_cost += noc_delta_cost.congestion_delta_c;
+t_placer_costs& t_placer_costs::operator+=(const NocCostTerms& noc_delta_cost) {
+    noc_aggregate_bandwidth_cost += noc_delta_cost.aggregate_bandwidth;
+    noc_latency_cost += noc_delta_cost.latency;
+    noc_congestion_cost += noc_delta_cost.congestion;
 
     return *this;
 }
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 30820e44ae8..26339702f76 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -12,10 +12,10 @@
 #include "vtr_vector_map.h"
 #include "globals.h"
 
-struct NocDeltaCost {
-    double aggregate_bandwidth_delta_c = 0.0;
-    double latency_delta_c = 0.0;
-    double congestion_delta_c = 0.0;
+struct NocCostTerms {
+    double aggregate_bandwidth = 0.0;
+    double latency = 0.0;
+    double congestion = 0.0;
 };
 
 /**
@@ -82,7 +82,7 @@ class t_placer_costs {
 
   public: //Mutator
     void update_norm_factors();
-    t_placer_costs& operator+=(const NocDeltaCost& noc_delta_cost);
+    t_placer_costs& operator+=(const NocCostTerms& noc_delta_cost);
 
   private:
     double MAX_INV_TIMING_COST = 1.e12;
diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index 7a3d90eb1d9..bb5041fcb29 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -597,8 +597,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     golden_traffic_flow_latency_costs.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows());
 
     // stores the change in bandwidth and latency costs from the test function
-    double test_noc_bandwidth_costs = 0;
-    double test_noc_latency_costs = 0;
+    NocCostTerms test_noc_costs{0.0, 0.0, 0.0};
 
     // we need to route all the traffic flows based on their initial positions
     for (int traffic_flow_number = 0; traffic_flow_number < number_of_created_traffic_flows; traffic_flow_number++) {
@@ -640,8 +639,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number] = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
         golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number] *= curr_traffic_flow.traffic_flow_priority;
 
-        test_noc_bandwidth_costs += golden_traffic_flow_bandwidth_costs[(NocTrafficFlowId)traffic_flow_number];
-        test_noc_latency_costs += golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number];
+        test_noc_costs.aggregate_bandwidth += golden_traffic_flow_bandwidth_costs[(NocTrafficFlowId)traffic_flow_number];
+        test_noc_costs.latency += golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number];
     }
 
     // initialize noc placement structs
@@ -772,14 +771,14 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
             }
         }
 
-        NocDeltaCost delta_cost {0.0, 0.0, 0.0};
+        NocCostTerms delta_cost {0.0, 0.0, 0.0};
 
         // call the test function
         find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, noc_opts);
 
         // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
-        test_noc_bandwidth_costs += delta_cost.aggregate_bandwidth_delta_c;
-        test_noc_latency_costs += delta_cost.latency_delta_c;
+        test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
+        test_noc_costs.latency += delta_cost.latency;
 
         // need this function to update the local datastructures that store all the traffic flow costs
         commit_noc_costs();
@@ -902,14 +901,14 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         golden_traffic_flow_latency_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
     }
 
-    NocDeltaCost delta_cost {0.0, 0.0, 0.0};
+    NocCostTerms delta_cost {0.0, 0.0, 0.0};
 
     // call the test function
     find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, noc_opts);
 
     // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
-    test_noc_bandwidth_costs += delta_cost.aggregate_bandwidth_delta_c;
-    test_noc_latency_costs += delta_cost.latency_delta_c;
+    test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
+    test_noc_costs.latency += delta_cost.latency;
 
     // need this function to update the local datastructures that store all the traffic flow costs
     commit_noc_costs();
@@ -993,14 +992,14 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     }
 
     // reset the delta costs
-    delta_cost = NocDeltaCost {0.0, 0.0, 0.0};
+    delta_cost = NocCostTerms {0.0, 0.0, 0.0};
 
     // call the test function
     find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, noc_opts);
 
     // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
-    test_noc_bandwidth_costs += delta_cost.aggregate_bandwidth_delta_c;
-    test_noc_latency_costs += delta_cost.latency_delta_c;
+    test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
+    test_noc_costs.latency += delta_cost.latency;
 
     // need this function to update the local datastructures that store all the traffic flow costs
     commit_noc_costs();
@@ -1056,14 +1055,14 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // we don't have to calculate the costs or update bandwidths because the swapped router blocks do not have any associated traffic flows //
 
     // reset the delta costs
-    delta_cost = NocDeltaCost {0.0, 0.0, 0.0};
+    delta_cost = NocCostTerms {0.0, 0.0, 0.0};
 
     // call the test function
     find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, noc_opts);
 
     // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
-    test_noc_bandwidth_costs += delta_cost.aggregate_bandwidth_delta_c;
-    test_noc_latency_costs += delta_cost.latency_delta_c;
+    test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
+    test_noc_costs.latency += delta_cost.latency;
 
     // need this function to update the local datastructures that store all the traffic flow costs
     commit_noc_costs();
@@ -1090,22 +1089,22 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     }
 
     // now check whether the expected noc costs that we manually calculated above match the noc costs found through the test function (we allow for a tolerance of difference)
-    REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_latency_costs));
-    REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_bandwidth_costs));
+    REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency));
+    REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth));
 
     // now test the recompute cost function //
     // The recompute cost function just adds up all traffic flow costs, so it match the expected noc costs that we manually calculated above by summing up all the expected individual traffic flow costs. //
 
     // start by resetting the test cost variables
-    test_noc_bandwidth_costs = 0.;
-    test_noc_latency_costs = 0.;
+    test_noc_costs.aggregate_bandwidth = 0.;
+    test_noc_costs.latency = 0.;
 
     // now execute the test function
-    recompute_noc_costs(test_noc_bandwidth_costs, test_noc_latency_costs);
+    recompute_noc_costs(test_noc_costs);
 
     // now verify
-    REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_latency_costs));
-    REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_bandwidth_costs));
+    REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency));
+    REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth));
 
     // delete local datastructures
     free_noc_placement_structs();

From 37a739e9eb006f120c3a1317b06631ec5e4e3174 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 23 Jan 2024 15:14:41 -0500
Subject: [PATCH 10/41] Use std::unique_ptr to hold the pointer to the routing
 algorithm.

---
 vpr/src/base/vpr_api.cpp                      |  7 ++----
 vpr/src/base/vpr_context.h                    |  2 +-
 vpr/src/noc/noc_routing_algorithm_creator.cpp |  8 +++----
 vpr/src/noc/noc_routing_algorithm_creator.h   |  3 ++-
 vpr/src/place/noc_place_utils.cpp             |  6 +----
 vpr/test/test_noc_place_utils.cpp             | 24 ++++++-------------
 6 files changed, 17 insertions(+), 33 deletions(-)

diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index 1e4684ae683..cc5d23343de 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -564,7 +564,7 @@ void vpr_setup_noc_routing_algorithm(std::string noc_routing_algorithm_name) {
     // newly created routing algorithm to it
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
-    noc_ctx.noc_flows_router = NocRoutingAlgorithmCreator().create_routing_algorithm(noc_routing_algorithm_name);
+    noc_ctx.noc_flows_router = NocRoutingAlgorithmCreator::create_routing_algorithm(noc_routing_algorithm_name);
     return;
 }
 
@@ -1208,10 +1208,7 @@ static void free_routing() {
 /**
  * @brief handles the deletion of NoC related datastructures.
  */
-static void free_noc() {
-    auto& noc_ctx = g_vpr_ctx.mutable_noc();
-    delete noc_ctx.noc_flows_router;
-}
+static void free_noc() {}
 
 void vpr_free_vpr_data_structures(t_arch& Arch,
                                   t_vpr_setup& vpr_setup) {
diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 6a07f367e13..18420590f2e 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -546,7 +546,7 @@ struct NocContext : public Context {
      *
      * This is created from a user supplied command line option "--noc_routing_algorithm"
      */
-    NocRouting* noc_flows_router;
+    std::unique_ptr<NocRouting> noc_flows_router;
 };
 
 /**
diff --git a/vpr/src/noc/noc_routing_algorithm_creator.cpp b/vpr/src/noc/noc_routing_algorithm_creator.cpp
index 65afcdc0a90..ddbd0ebb9d7 100644
--- a/vpr/src/noc/noc_routing_algorithm_creator.cpp
+++ b/vpr/src/noc/noc_routing_algorithm_creator.cpp
@@ -2,13 +2,13 @@
 #include "noc_routing_algorithm_creator.h"
 #include "vpr_error.h"
 
-NocRouting* NocRoutingAlgorithmCreator::create_routing_algorithm(const std::string& routing_algorithm_name) {
-    NocRouting* noc_routing_algorithm = nullptr;
+std::unique_ptr<NocRouting> NocRoutingAlgorithmCreator::create_routing_algorithm(const std::string& routing_algorithm_name) {
+    std::unique_ptr<NocRouting> noc_routing_algorithm;
 
     if (routing_algorithm_name == "xy_routing") {
-        noc_routing_algorithm = new XYRouting();
+        noc_routing_algorithm = std::make_unique<XYRouting>();
     } else if (routing_algorithm_name == "bfs_routing") {
-        noc_routing_algorithm = new BFSRouting();
+        noc_routing_algorithm = std::make_unique<BFSRouting>();
     } else {
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "The provided NoC routing algorithm '%s' is not supported.", routing_algorithm_name.c_str());
     }
diff --git a/vpr/src/noc/noc_routing_algorithm_creator.h b/vpr/src/noc/noc_routing_algorithm_creator.h
index b05d9f57981..b4361d95d33 100644
--- a/vpr/src/noc/noc_routing_algorithm_creator.h
+++ b/vpr/src/noc/noc_routing_algorithm_creator.h
@@ -18,6 +18,7 @@
  */
 
 #include <string>
+#include <memory>
 
 #include "noc_routing.h"
 #include "xy_routing.h"
@@ -39,7 +40,7 @@ class NocRoutingAlgorithmCreator {
      * NoC routing algorithm
      * @return NocRouting* A reference to the created NoC routing algorithm
      */
-    NocRouting* create_routing_algorithm(const std::string& routing_algorithm_name);
+    static std::unique_ptr<NocRouting> create_routing_algorithm(const std::string& routing_algorithm_name);
 };
 
 #endif
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 8d2858fdf95..25bba5a4867 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -423,8 +423,7 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
     std::for_each(temp_noc_link_storage.begin(), temp_noc_link_storage.end(), [](NocLink& link) {link.set_bandwidth_usage(0.0); });
 
     // need to create a temporary noc routing algorithm
-    NocRoutingAlgorithmCreator routing_algorithm_factory;
-    NocRouting* temp_noc_routing_algorithm = routing_algorithm_factory.create_routing_algorithm(noc_opts.noc_routing_algorithm);
+    std::unique_ptr<NocRouting> temp_noc_routing_algorithm = NocRoutingAlgorithmCreator::create_routing_algorithm(noc_opts.noc_routing_algorithm);
 
     // stores a temporarily found route for a traffic flow
     std::vector<NocLinkId> temp_found_noc_route;
@@ -496,9 +495,6 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
         error++;
     }
 
-    // delete the temporary routing algorithm
-    delete temp_noc_routing_algorithm;
-
     return error;
 }
 
diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index bb5041fcb29..0e5de07283e 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -33,7 +33,6 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
     // start by deleting any global datastructures (this is so that we don't have corruption from previous tests)
     noc_ctx.noc_model.clear_noc();
     noc_ctx.noc_traffic_flows_storage.clear_traffic_flows();
-    delete noc_ctx.noc_flows_router;
     place_ctx.block_locs.clear();
 
     // store the reference to device grid with
@@ -142,8 +141,7 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
 
     // now go and route all the traffic flows //
     // start by creating the routing algorithm
-    NocRouting* routing_algorithm_global = new XYRouting();
-    noc_ctx.noc_flows_router = routing_algorithm_global;
+    noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
     NocRouting* routing_algorithm = new XYRouting();
@@ -213,7 +211,6 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
     // start by deleting any global datastructures (this is so that we don't have corruption from previous tests)
     noc_ctx.noc_model.clear_noc();
     noc_ctx.noc_traffic_flows_storage.clear_traffic_flows();
-    delete noc_ctx.noc_flows_router;
     place_ctx.block_locs.clear();
 
     // store the reference to device grid with
@@ -330,8 +327,7 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
 
     // now go and route all the traffic flows //
     // start by creating the routing algorithm
-    NocRouting* routing_algorithm_global = new XYRouting();
-    noc_ctx.noc_flows_router = routing_algorithm_global;
+    noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
     NocRouting* routing_algorithm = new XYRouting();
@@ -455,7 +451,6 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // start by deleting any global datastructures (this is so that we don't have corruption from previous tests)
     noc_ctx.noc_model.clear_noc();
     noc_ctx.noc_traffic_flows_storage.clear_traffic_flows();
-    delete noc_ctx.noc_flows_router;
     place_ctx.block_locs.clear();
 
     // store the reference to device grid with
@@ -581,8 +576,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
     // now go and route all the traffic flows //
     // start by creating the routing algorithm
-    NocRouting* routing_algorithm_global = new XYRouting();
-    noc_ctx.noc_flows_router = routing_algorithm_global;
+    noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
     NocRouting* routing_algorithm = new XYRouting();
@@ -1089,8 +1083,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     }
 
     // now check whether the expected noc costs that we manually calculated above match the noc costs found through the test function (we allow for a tolerance of difference)
-    REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency));
     REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth));
+    REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency));
 
     // now test the recompute cost function //
     // The recompute cost function just adds up all traffic flow costs, so it match the expected noc costs that we manually calculated above by summing up all the expected individual traffic flow costs. //
@@ -1103,8 +1097,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     recompute_noc_costs(test_noc_costs);
 
     // now verify
-    REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency));
     REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth));
+    REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency));
 
     // delete local datastructures
     free_noc_placement_structs();
@@ -1193,7 +1187,6 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
     // start by deleting any global datastructures (this is so that we don't have corruption from previous tests)
     noc_ctx.noc_model.clear_noc();
     noc_ctx.noc_traffic_flows_storage.clear_traffic_flows();
-    delete noc_ctx.noc_flows_router;
     place_ctx.block_locs.clear();
 
     // store the reference to device grid with
@@ -1315,8 +1308,7 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
 
     // now go and route all the traffic flows //
     // start by creating the routing algorithm
-    NocRouting* routing_algorithm_global = new XYRouting();
-    noc_ctx.noc_flows_router = routing_algorithm_global;
+    noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
     NocRouting* routing_algorithm = new XYRouting();
@@ -1518,7 +1510,6 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
     // start by deleting any global datastructures (this is so that we don't have corruption from previous tests)
     noc_ctx.noc_model.clear_noc();
     noc_ctx.noc_traffic_flows_storage.clear_traffic_flows();
-    delete noc_ctx.noc_flows_router;
     place_ctx.block_locs.clear();
 
     // store the reference to device grid with
@@ -1648,8 +1639,7 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
 
     // now go and route all the traffic flows //
     // start by creating the routing algorithm
-    NocRouting* routing_algorithm_global = new XYRouting();
-    noc_ctx.noc_flows_router = routing_algorithm_global;
+    noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
     NocRouting* routing_algorithm = new XYRouting();

From 7a01effe92058e8baf4ab3cace716e9d0e1f2ec3 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 23 Jan 2024 15:26:18 -0500
Subject: [PATCH 11/41] Add calculate_noc_cost()

---
 vpr/src/place/initial_noc_placement.cpp |  4 ++--
 vpr/src/place/noc_place_utils.cpp       | 22 ++++++++++++++++------
 vpr/src/place/noc_place_utils.h         | 13 ++-----------
 vpr/src/place/place.cpp                 |  7 +++----
 vpr/src/place/place_util.cpp            | 16 +++++++++++++---
 vpr/src/place/place_util.h              |  9 +++++++++
 6 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp
index 79717d2c421..d0aaa573bf7 100644
--- a/vpr/src/place/initial_noc_placement.cpp
+++ b/vpr/src/place/initial_noc_placement.cpp
@@ -162,7 +162,7 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     costs.noc_latency_cost = comp_noc_latency_cost(noc_opts);
     costs.noc_congestion_cost = comp_noc_congestion_cost(noc_opts);
     update_noc_normalization_factors(costs);
-    costs.cost = calculate_noc_cost(costs, noc_opts);
+    costs.cost = calculate_noc_cost(NocCostTerms(costs), costs, noc_opts);
 
     // Maximum distance in each direction that a router can travel in a move
     // It is assumed that NoC routers are organized in a square grid.
@@ -214,7 +214,7 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
 
             NocCostTerms noc_delta_c {0.0, 0.0, 0.0};
             find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c, noc_opts);
-            double delta_cost = (noc_opts.noc_placement_weighting) * (noc_delta_c.latency * costs.noc_latency_cost_norm + noc_delta_c.aggregate_bandwidth * costs.noc_aggregate_bandwidth_cost_norm);
+            double delta_cost = calculate_noc_cost(noc_delta_c, costs, noc_opts);
 
             double prob = starting_prob - i_move * prob_step;
             bool move_accepted = accept_noc_swap(delta_cost, prob);
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 25bba5a4867..7e482bf6a09 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -326,12 +326,6 @@ void update_noc_normalization_factors(t_placer_costs& costs) {
     return;
 }
 
-double calculate_noc_cost(const t_placer_costs& costs, const t_noc_opts& noc_opts) {
-    double noc_cost;
-    noc_cost = (noc_opts.noc_placement_weighting) * ((costs.noc_aggregate_bandwidth_cost * costs.noc_aggregate_bandwidth_cost_norm) + (costs.noc_latency_cost * costs.noc_latency_cost_norm));
-    return noc_cost;
-}
-
 double comp_noc_aggregate_bandwidth_cost(void) {
     // used to get traffic flow route information
     auto& noc_ctx = g_vpr_ctx.noc();
@@ -534,6 +528,22 @@ double calculate_link_congestion_cost(const NocLink& link, const t_noc_opts& noc
     return congestion_cost;
 }
 
+double calculate_noc_cost(const NocCostTerms& cost_terms, const t_placer_costs& norm_factors, const t_noc_opts& noc_opts) {
+    double cost = 0.0;
+
+    /* NoC's contribution to the placement cost is a weighted sum over:
+     * 1) Traffic flow latency costs
+     * 2) Traffic flow aggregate bandwidth costs
+     * 3) Link congestion costs
+     */
+    cost = noc_opts.noc_placement_weighting * (
+               cost_terms.latency * norm_factors.noc_latency_cost_norm +
+               cost_terms.aggregate_bandwidth * norm_factors.noc_aggregate_bandwidth_cost_norm +
+               cost_terms.congestion * norm_factors.noc_congestion_cost_norm);
+
+    return cost;
+}
+
 int get_number_of_traffic_flows_with_latency_cons_met(void) {
     // used to get traffic flow route information
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index 9056caa64e5..d1e39bd32f7 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -275,17 +275,6 @@ void recompute_noc_costs(NocCostTerms& new_cost);
  */
 void update_noc_normalization_factors(t_placer_costs& costs);
 
-/**
- * @brief Calculates total NoC cost.
- *
- *  @param costs Contains latency and aggregate bandwidth costs
- *  along with their corresponding normalization factors.
- *  @param noc_opts Contains NoC placement weighting factor.
- *
- * @return Calculated total NoC cost.
- */
-double calculate_noc_cost(const t_placer_costs& costs, const t_noc_opts& noc_opts);
-
 /**
  * @brief Calculates the aggregate bandwidth of each traffic flow in the NoC
  * and initializes local variables that keep track of the traffic flow 
@@ -384,6 +373,8 @@ double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic
 
 double calculate_link_congestion_cost(const NocLink& link, const t_noc_opts& noc_opts);
 
+double calculate_noc_cost(const NocCostTerms& cost_terms, const t_placer_costs& norm_factors, const t_noc_opts& noc_opts);
+
 /**
  * @brief Goes through all the traffic flows and determines whether the
  * latency constraints have been met for each traffic flow. 
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 7f86947d56e..b5d256d3d6b 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1605,7 +1605,6 @@ static e_move_result try_swap(const t_annealing_state* state,
 
     float rlim_escape_fraction = placer_opts.rlim_escape_fraction;
     float timing_tradeoff = placer_opts.timing_tradeoff;
-    double noc_placement_weighting = noc_opts.noc_placement_weighting;
 
     PlaceCritParams crit_params;
     crit_params.crit_exponent = state->crit_exponent;
@@ -1760,7 +1759,7 @@ static e_move_result try_swap(const t_annealing_state* state,
             find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c, noc_opts);
 
             // Include the NoC delta costs in the total cost change for this swap
-            delta_c = delta_c + noc_placement_weighting * (noc_delta_c.latency * costs->noc_latency_cost_norm + noc_delta_c.aggregate_bandwidth * costs->noc_aggregate_bandwidth_cost_norm);
+            delta_c += calculate_noc_cost(noc_delta_c, *costs, noc_opts);
         }
 
         /* 1 -> move accepted, 0 -> rejected. */
@@ -2280,8 +2279,8 @@ static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_
     }
 
     if (noc_opts.noc) {
-        // in noc mode we include noc aggregate bandwidth and noc latency
-        total_cost += calculate_noc_cost(*costs, noc_opts);
+        // in noc mode we include noc agggregate bandwidth and noc latency
+        total_cost += calculate_noc_cost(NocCostTerms(*costs), *costs, noc_opts);
     }
 
     return total_cost;
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 3e63f1e9881..2b9d38e9438 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -171,7 +171,7 @@ bool t_annealing_state::outer_loop_update(float success_rate,
                                           const t_annealing_sched& annealing_sched) {
 #ifndef NO_GRAPHICS
     t_draw_state* draw_state = get_draw_state_vars();
-    if (draw_state->list_of_breakpoints.size() != 0) {
+    if (!draw_state->list_of_breakpoints.empty()) {
         /* Update temperature in the current information variable. */
         get_bp_state_globals()->get_glob_breakpoint_state()->temp_count++;
     }
@@ -384,7 +384,7 @@ void zero_initialize_grid_blocks() {
                 place_ctx.grid_blocks.set_usage({i, j, layer_num}, 0);
                 auto tile = device_ctx.grid.get_physical_type({i, j, layer_num});
 
-                for (auto sub_tile : tile->sub_tiles) {
+                for (const auto& sub_tile : tile->sub_tiles) {
                     auto capacity = sub_tile.capacity;
 
                     for (int k = 0; k < capacity.total(); k++) {
@@ -517,7 +517,7 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_
 
         /*
          * analytical placement approach do not need to make sure whether location could accommodate more blocks
-         * since overused locations will be spreaded by legalizer afterward.
+         * since overused locations will be spread by legalizer afterward.
          * floorplan constraint is not supported by analytical placement yet, 
          * hence, if macro_can_be_placed is called from analytical placer, no further actions are required. 
          */
@@ -560,3 +560,13 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_
 
     return (mac_can_be_placed);
 }
+
+NocCostTerms::NocCostTerms(const t_placer_costs& costs)
+    : aggregate_bandwidth(costs.noc_aggregate_bandwidth_cost)
+    , latency(costs.noc_latency_cost)
+    , congestion(costs.noc_congestion_cost) {}
+
+NocCostTerms::NocCostTerms(double agg_bw, double lat, double congest)
+    : aggregate_bandwidth(agg_bw)
+    , latency(lat)
+    , congestion(congest) {}
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 26339702f76..b70c84c3643 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -12,7 +12,16 @@
 #include "vtr_vector_map.h"
 #include "globals.h"
 
+// forward declaration of t_placer_costs so that it can be used an argument
+// in NocCostTerms constructor
+class t_placer_costs;
+
 struct NocCostTerms {
+  public:
+    NocCostTerms() = delete;
+    explicit NocCostTerms(const t_placer_costs& costs);
+    NocCostTerms(double agg_bw, double lat, double congest);
+
     double aggregate_bandwidth = 0.0;
     double latency = 0.0;
     double congestion = 0.0;

From e43ef3d08a49637dc62d0c08dc762ec840c31dd5 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 23 Jan 2024 15:36:53 -0500
Subject: [PATCH 12/41] Add --noc_congestion_weighting command line option

---
 vpr/src/base/SetupVPR.cpp     | 1 +
 vpr/src/base/ShowSetup.cpp    | 1 +
 vpr/src/base/read_options.cpp | 7 +++++++
 vpr/src/base/read_options.h   | 1 +
 vpr/src/base/vpr_types.h      | 2 +-
 5 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index eecec4d39ce..a93b648f87b 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -737,6 +737,7 @@ static void SetupNocOpts(const t_options& Options, t_noc_opts* NocOpts) {
     NocOpts->noc_placement_weighting = Options.noc_placement_weighting;
     NocOpts->noc_latency_constraints_weighting = Options.noc_latency_constraints_weighting;
     NocOpts->noc_latency_weighting = Options.noc_latency_weighting;
+    NocOpts->noc_congestion_weighting = Options.noc_congestion_weighting;
     NocOpts->noc_swap_percentage = Options.noc_swap_percentage;
     NocOpts->noc_placement_file_name = Options.noc_placement_file_name;
 
diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp
index 61f1bf772c3..92935c6c8fe 100644
--- a/vpr/src/base/ShowSetup.cpp
+++ b/vpr/src/base/ShowSetup.cpp
@@ -788,6 +788,7 @@ static void ShowNocOpts(const t_noc_opts& NocOpts) {
     VTR_LOG("NocOpts.noc_placement_weighting: %f\n", NocOpts.noc_placement_weighting);
     VTR_LOG("NocOpts.noc_latency_constraints_weighting: %f\n", NocOpts.noc_latency_constraints_weighting);
     VTR_LOG("NocOpts.noc_latency_weighting: %f\n", NocOpts.noc_latency_weighting);
+    VTR_LOG("NocOpts.noc_congestion_weighting: %f\n", NocOpts.noc_congestion_weighting);
     VTR_LOG("NocOpts.noc_swap_percentage: %d%%\n", NocOpts.noc_swap_percentage);
     VTR_LOG("NocOpts.noc_routing_algorithm: %s\n", NocOpts.noc_placement_file_name.c_str());
     VTR_LOG("\n");
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index d62b812e4b8..971f92b5071 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2819,6 +2819,13 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
         .default_value("0.05")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
+    noc_grp.add_argument<double>(args.noc_congestion_weighting, "--noc_congestion_weighting")
+        .help(
+            "Controls the importance of reducing the congestion of the NoC links."
+            "This value can be >=0, where 0 would mean the congestion has no relevance to placement, a value of 1 would mean the congestion is weighted equally to the sum of other placement cost components and a value greater than 1 would mean the placement is increasingly dominated by reducing the link congestions.")
+        .default_value("0.05")
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
     noc_grp.add_argument<double>(args.noc_swap_percentage, "--noc_swap_percentage")
         .help(
             "Sets the minimum fraction of swaps attempted by the placer that are NoC blocks."
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index d1edc5ef2b2..e6476ba151e 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -155,6 +155,7 @@ struct t_options {
     argparse::ArgValue<double> noc_placement_weighting;
     argparse::ArgValue<double> noc_latency_constraints_weighting;
     argparse::ArgValue<double> noc_latency_weighting;
+    argparse::ArgValue<double> noc_congestion_weighting;
     argparse::ArgValue<double> noc_swap_percentage;
     argparse::ArgValue<std::string> noc_placement_file_name;
 
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 2965dc30856..b57caa24999 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1498,7 +1498,7 @@ struct t_noc_opts {
     double noc_placement_weighting;           ///<controls the significance of the NoC placement cost relative to the total placement cost range:[0-inf)
     double noc_latency_constraints_weighting; ///<controls the significance of meeting the traffic flow contraints range:[0-inf)
     double noc_latency_weighting;             ///<controls the significance of the traffic flow latencies relative to the other NoC placement costs range:[0-inf)
-    double noc_congestion_weighting;           ///<controls the significance of the link congestions relative to the other NoC placement costs range:[0-inf)
+    double noc_congestion_weighting;          ///<controls the significance of the link congestions relative to the other NoC placement costs range:[0-inf)
     int noc_swap_percentage;                  ///<controls the number of NoC router block swap attemps relative to the total number of swaps attempted by the placer range:[0-100]
     std::string noc_placement_file_name;      ///<is the name of the output file that contains the NoC placement information
 };

From b2ec184d4432dd9a9cf65675f20169fd2a3562a6 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 25 Jan 2024 18:42:56 -0500
Subject: [PATCH 13/41] Compute and print NoC congestion metrics.

Added some comments to noc_link.h to explain what each method does.
---
 vpr/src/noc/noc_link.h            | 52 ++++++++++++++++++++++++-------
 vpr/src/noc/noc_storage.cpp       |  6 ++++
 vpr/src/place/noc_place_utils.cpp | 34 ++++++++++++++++++++
 vpr/src/place/noc_place_utils.h   | 16 +++++++---
 vpr/src/place/place.cpp           | 45 +++++++++++++++++++++++---
 5 files changed, 134 insertions(+), 19 deletions(-)

diff --git a/vpr/src/noc/noc_link.h b/vpr/src/noc/noc_link.h
index 244fe7b6959..2aa5d55cd67 100644
--- a/vpr/src/noc/noc_link.h
+++ b/vpr/src/noc/noc_link.h
@@ -73,10 +73,36 @@ class NocLink {
 
     /**
      * @brief Provides the size of the data (bandwidth) being currently transmitted using the link.
-     * @return A numeric value of the bandwidth of the link
+     * @return A numeric value of the bandwidth usage of the link
      */
     double get_bandwidth_usage(void) const;
 
+    /**
+     * @brief Returns the maximum bandwidth that the link can carry without congestion.
+     * @return A numeric value of the bandwidth capacity of the link
+     */
+    double get_bandwidth(void) const;
+
+    /**
+     * @brief Calculates the extent to which the current bandwidth utilization
+     * exceeds the link capacity. Any positive value means the link is congested.
+     * @return A numeric value of the bandwidth over-utilization in the link
+     */
+    double get_congested_bandwidth(void) const;
+
+    /**
+     * @brief Computes the congested bandwidth to bandwidth capacity ratio.
+     * @return The congested bandwidth to bandwidth capacity of the link.
+     */
+    double get_congested_bandwidth_ratio() const;
+
+    /**
+     * @brief Returns the unique link ID. The ID can be used to index
+     * vtr::vector<NoCLinkId, ...> instances.
+     * @return The unique ID for the link
+     */
+    NocLinkId get_link_id() const;
+
     // setters
     /**
      * @brief Can be used to set the source router of the link to a different router. 
@@ -93,24 +119,28 @@ class NocLink {
     void set_sink_router(NocRouterId sink);
 
     /**
-     * @brief Can modify the bandwidth of the link. It is expected that when the NoC is being placed
+     * @brief Can modify the bandwidth usage of the link. It is expected that when the NoC is being placed
      * the traffic flows will be re-routed multiple times. So the links will end up being used and un-used
      * by different traffic flows and the bandwidths of the links will correspondingly change. This function
      * can be used to make those changes
-     * @param new_bandwidth_usage The new value of the bandwidth of the link
+     * @param new_bandwidth_usage The new value of the bandwidth usage of the link
      */
     void set_bandwidth_usage(double new_bandwidth_usage);
 
+    /**
+     * @brief Sets the bandwidth capacity of the link. This function should be used when
+     * global NoC data structures are created and populated. The bandwidth capacity is used
+     * along with bandwidth_usage to measure congestion.
+     * @param new_bandwidth The new value of the bandwidth of the link
+     */
     void set_bandwidth(double new_bandwidth);
 
-    double get_bandwidth() const;
-
-    double get_congested_bandwidth() const;
-
-    double get_congested_bandwidth_ratio() const;
-
-    NocLinkId get_link_id() const;
-
+    
+    /**
+     * @brief Returns the unique link ID. The ID can be used to index
+     * vtr::vector<NoCLinkId, ...> instances.
+     * @return The unique ID for the link
+     */
     operator NocLinkId() const;
 };
 
diff --git a/vpr/src/noc/noc_storage.cpp b/vpr/src/noc/noc_storage.cpp
index af22454765d..8438838c1f9 100644
--- a/vpr/src/noc/noc_storage.cpp
+++ b/vpr/src/noc/noc_storage.cpp
@@ -128,6 +128,12 @@ void NocStorage::add_link(NocRouterId source, NocRouterId sink) {
 
 void NocStorage::set_noc_link_bandwidth(double link_bandwidth) {
     noc_link_bandwidth = link_bandwidth;
+
+    // Iterate over all links and set their bandwidth
+    for (auto& link : link_storage) {
+        link.set_bandwidth(noc_link_bandwidth);
+    }
+
     return;
 }
 
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 7e482bf6a09..d77d29c0455 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -578,6 +578,40 @@ int get_number_of_traffic_flows_with_latency_cons_met(void) {
     return count_of_achieved_latency_cons;
 }
 
+int get_number_of_congested_noc_links(void) {
+    // get NoC links
+    auto& noc_links = g_vpr_ctx.noc().noc_model.get_noc_links();
+
+    int num_congested_links = 0;
+
+    // Iterate over all NoC links and count the congested ones
+    for (const auto& link : noc_links) {
+      double congested_bw_ratio = link.get_congested_bandwidth_ratio();
+
+      if (congested_bw_ratio > MIN_EXPECTED_NOC_CONGESTION_COST) {
+            num_congested_links++;
+      }
+    }
+
+    return num_congested_links;
+}
+
+std::vector<NocLink> get_top_n_congested_links(int n) {
+    // get NoC links
+    vtr::vector<NocLinkId, NocLink> noc_links = g_vpr_ctx.noc().noc_model.get_noc_links();
+
+    // Sort links based on their congested bandwidth ration in descending order
+    // stable_sort is used to make sure the order is the same across different machines/compilers
+    // Note that when the vector is sorted, indexing it with NocLinkId does return the corresponding link
+    std::stable_sort(noc_links.begin(), noc_links.end(), [](const NocLink& l1, const NocLink& l2) {
+                         return l1.get_congested_bandwidth_ratio() > l2.get_congested_bandwidth_ratio();
+                     });
+
+    int pick_n = std::min((int)noc_links.size(), n);
+
+    return std::vector<NocLink>{noc_links.begin(), noc_links.begin() + pick_n};
+}
+
 void allocate_and_load_noc_placement_structs(void) {
     auto& noc_ctx = g_vpr_ctx.noc();
 
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index d1e39bd32f7..c3e9d213e09 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -19,13 +19,15 @@ constexpr double MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST = 1.;
 // we expect the latency costs to be in the pico-second range, and we don't expect it to go lower than that. So if the latency costs go below the pico-second range we trim the normalization value to be no higher than 1/ps
 // This should be updated if the delays become lower
 constexpr double MAX_INV_NOC_LATENCY_COST = 1.e12;
-// the congestion cost for a link is measured as the proportion of the overloaded BW to the link capacity
-// We assume that when a link congested, it is overloaded with at least 0.1% of its BW capacity
-constexpr double MAX_INV_NOC_CONGESTION_COST = 1.e3;
-
 // we don't expect the noc_latency cost to ever go below 1 pico second.
 // So this value represents the lowest possible latency cost.
 constexpr double MIN_EXPECTED_NOC_LATENCY_COST = 1.e-12;
+// the congestion cost for a link is measured as the proportion of the overloaded BW to the link capacity
+// We assume that when a link congested, it is overloaded with at least 0.1% of its BW capacity
+constexpr double MAX_INV_NOC_CONGESTION_COST = 1.e3;
+// If a link is overloaded by less than 0.1% of the link bandwidth capacity,
+// we assume it is not congested.
+constexpr double MIN_EXPECTED_NOC_CONGESTION_COST = 1.e-3;
 
 constexpr double INVALID_NOC_COST_TERM = -1.0;
 
@@ -383,6 +385,12 @@ double calculate_noc_cost(const NocCostTerms& cost_terms, const t_placer_costs&
  */
 int get_number_of_traffic_flows_with_latency_cons_met(void);
 
+int get_number_of_congested_noc_links(void);
+
+std::vector<NocLink> get_top_n_congested_links(int n);
+
+std::vector<double> get_top_n_congestion_ratios(int n);
+
 /**
  * @brief There are a number of static datastructures which are local
  * to 'noc_place_utils.cpp'. THe purpose of these datastructures is
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index b5d256d3d6b..4193283db09 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -830,7 +830,16 @@ void try_place(const Netlist<>& net_list,
     VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost,
             costs.bb_cost, costs.timing_cost);
     if (noc_opts.noc) {
-        VTR_LOG("Initial noc placement costs. noc_aggregate_bandwidth_cost: %g, noc_latency_cost: %g, \n", costs.noc_aggregate_bandwidth_cost, costs.noc_latency_cost);
+        VTR_LOG("NoC Placement Costs. noc_aggregate_bandwidth_cost: %g, "
+            "noc_latency_cost: %g, "
+            "noc_latency_constraints_cost: %d, "
+            "noc_congestion_cost: %g, "
+            "n_congested_links: %d \n",
+            costs.noc_aggregate_bandwidth_cost,
+            costs.noc_latency_cost,
+            get_number_of_traffic_flows_with_latency_cons_met(),
+            costs.noc_congestion_cost_norm,
+            get_number_of_congested_noc_links());
     }
     if (placer_opts.place_algorithm.is_timing_driven()) {
         VTR_LOG(
@@ -864,7 +873,16 @@ void try_place(const Netlist<>& net_list,
             costs.cost, costs.bb_cost, costs.timing_cost, width_fac);
     if (noc_opts.noc) {
         sprintf(msg,
-                "\nInitial noc placement costs. noc_aggregate_bandwidth_cost: %g noc_latency_cost: %g ", costs.noc_aggregate_bandwidth_cost, costs.noc_latency_cost);
+                "\nInitial NoC Placement Costs. noc_aggregate_bandwidth_cost: %g "
+                "noc_latency_cost: %g "
+                "noc_latency_constraints_cost: %d "
+                "noc_congestion_cost: %g "
+                "n_congested_links: %d",
+                costs.noc_aggregate_bandwidth_cost,
+                costs.noc_latency_cost,
+                get_number_of_traffic_flows_with_latency_cons_met(),
+                costs.noc_congestion_cost_norm,
+                get_number_of_congested_noc_links());
     }
     //Draw the initial placement
     update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info);
@@ -1173,8 +1191,27 @@ void try_place(const Netlist<>& net_list,
     // print the noc costs info
     if (noc_opts.noc) {
         sprintf(msg,
-                "\nNoC Placement Costs. noc_aggregate_bandwidth_cost: %g noc_latency_cost: %g noc_latency_constraints_cost: %d", costs.noc_aggregate_bandwidth_cost, costs.noc_latency_cost, get_number_of_traffic_flows_with_latency_cons_met());
-        VTR_LOG("NoC Placement Costs. noc_aggregate_bandwidth_cost: %g, noc_latency_cost: %g, noc_latency_constraints_cost: %d, \n", costs.noc_aggregate_bandwidth_cost, costs.noc_latency_cost, get_number_of_traffic_flows_with_latency_cons_met());
+                "\nNoC Placement Costs. noc_aggregate_bandwidth_cost: %g "
+                "noc_latency_cost: %g "
+                "noc_latency_constraints_cost: %d "
+                "noc_congestion_cost: %g "
+                "n_congested_links: %d",
+                costs.noc_aggregate_bandwidth_cost,
+                costs.noc_latency_cost,
+                get_number_of_traffic_flows_with_latency_cons_met(),
+                costs.noc_congestion_cost_norm,
+                get_number_of_congested_noc_links());
+
+        VTR_LOG("NoC Placement Costs. noc_aggregate_bandwidth_cost: %g, "
+            "noc_latency_cost: %g, "
+            "noc_latency_constraints_cost: %d, "
+            "noc_congestion_cost: %g, "
+            "n_congested_links: %d \n",
+            costs.noc_aggregate_bandwidth_cost,
+            costs.noc_latency_cost,
+            get_number_of_traffic_flows_with_latency_cons_met(),
+            costs.noc_congestion_cost_norm,
+            get_number_of_congested_noc_links());
     }
     update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info);
     // Print out swap statistics

From 017da60cabfdcffd1a962746d740bdb270d6dbe3 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 25 Jan 2024 18:50:41 -0500
Subject: [PATCH 14/41] Add get_total_congestion_bandwidth_ratio()

---
 vpr/src/place/noc_place_utils.cpp | 15 +++++++++++++++
 vpr/src/place/noc_place_utils.h   |  2 ++
 vpr/src/place/place.cpp           | 16 ++++++++++++----
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index d77d29c0455..11e18d89c1f 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -596,6 +596,21 @@ int get_number_of_congested_noc_links(void) {
     return num_congested_links;
 }
 
+double get_total_congestion_bandwidth_ratio(void) {
+    // get NoC links
+    auto& noc_links = g_vpr_ctx.noc().noc_model.get_noc_links();
+
+    double accum_congestion_ratio = 0.0;
+
+    // Iterate over all NoC links and count the congested ones
+    for (const auto& link : noc_links) {
+      double congested_bw_ratio = link.get_congested_bandwidth_ratio();
+      accum_congestion_ratio += congested_bw_ratio;
+    }
+
+    return accum_congestion_ratio;
+}
+
 std::vector<NocLink> get_top_n_congested_links(int n) {
     // get NoC links
     vtr::vector<NocLinkId, NocLink> noc_links = g_vpr_ctx.noc().noc_model.get_noc_links();
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index c3e9d213e09..dd97f7d1bc6 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -387,6 +387,8 @@ int get_number_of_traffic_flows_with_latency_cons_met(void);
 
 int get_number_of_congested_noc_links(void);
 
+double get_total_congestion_bandwidth_ratio(void);
+
 std::vector<NocLink> get_top_n_congested_links(int n);
 
 std::vector<double> get_top_n_congestion_ratios(int n);
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 4193283db09..28a922dbe3f 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -834,11 +834,13 @@ void try_place(const Netlist<>& net_list,
             "noc_latency_cost: %g, "
             "noc_latency_constraints_cost: %d, "
             "noc_congestion_cost: %g, "
+            "accum_congested_ratio: %g, "
             "n_congested_links: %d \n",
             costs.noc_aggregate_bandwidth_cost,
             costs.noc_latency_cost,
             get_number_of_traffic_flows_with_latency_cons_met(),
-            costs.noc_congestion_cost_norm,
+            costs.noc_congestion_cost,
+            get_total_congestion_bandwidth_ratio(),
             get_number_of_congested_noc_links());
     }
     if (placer_opts.place_algorithm.is_timing_driven()) {
@@ -877,11 +879,13 @@ void try_place(const Netlist<>& net_list,
                 "noc_latency_cost: %g "
                 "noc_latency_constraints_cost: %d "
                 "noc_congestion_cost: %g "
+                "accum_congested_ratio: %g, "
                 "n_congested_links: %d",
                 costs.noc_aggregate_bandwidth_cost,
                 costs.noc_latency_cost,
                 get_number_of_traffic_flows_with_latency_cons_met(),
-                costs.noc_congestion_cost_norm,
+                costs.noc_congestion_cost,
+                get_total_congestion_bandwidth_ratio(),
                 get_number_of_congested_noc_links());
     }
     //Draw the initial placement
@@ -1195,22 +1199,26 @@ void try_place(const Netlist<>& net_list,
                 "noc_latency_cost: %g "
                 "noc_latency_constraints_cost: %d "
                 "noc_congestion_cost: %g "
+                "accum_congested_ratio: %g, "
                 "n_congested_links: %d",
                 costs.noc_aggregate_bandwidth_cost,
                 costs.noc_latency_cost,
                 get_number_of_traffic_flows_with_latency_cons_met(),
-                costs.noc_congestion_cost_norm,
+                costs.noc_congestion_cost,
+                get_total_congestion_bandwidth_ratio(),
                 get_number_of_congested_noc_links());
 
         VTR_LOG("NoC Placement Costs. noc_aggregate_bandwidth_cost: %g, "
             "noc_latency_cost: %g, "
             "noc_latency_constraints_cost: %d, "
             "noc_congestion_cost: %g, "
+            "accum_congested_ratio: %g, "
             "n_congested_links: %d \n",
             costs.noc_aggregate_bandwidth_cost,
             costs.noc_latency_cost,
             get_number_of_traffic_flows_with_latency_cons_met(),
-            costs.noc_congestion_cost_norm,
+            costs.noc_congestion_cost,
+            get_total_congestion_bandwidth_ratio(),
             get_number_of_congested_noc_links());
     }
     update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info);

From 2ad4b697f42186df12ed91498db1e58444abfbe8 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Fri, 26 Jan 2024 12:55:04 -0500
Subject: [PATCH 15/41] Fix NoC test failure

Some NoC tests were failing due to newly added code for congestion modeling. This commit hopefully fixes them.
---
 vpr/src/place/noc_place_utils.cpp |  4 +++-
 vpr/test/test_noc_place_utils.cpp | 10 ++++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 11e18d89c1f..525464b03e7 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -481,8 +481,10 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
         }
     }
 
+    // if congestion cost is zero, we use a small cost for calculating the accepted error range
+    double non_zero_congestion_cost = (costs.noc_congestion_cost == 0) ? MIN_EXPECTED_NOC_CONGESTION_COST : costs.noc_congestion_cost;
     // check whether the NoC congestion cost is within the error range
-    if (fabs(cost_check.congestion - costs.noc_congestion_cost) > costs.noc_congestion_cost * error_tolerance) {
+    if (fabs(cost_check.congestion - costs.noc_congestion_cost) > non_zero_congestion_cost * error_tolerance) {
         VTR_LOG_ERROR(
             "noc_congestion_cost_check: %g and noc_congestion_cost: %g differ in check_noc_placement_costs.\n",
             cost_check.congestion, costs.noc_congestion_cost);
diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index 0e5de07283e..87614fe64b9 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -471,6 +471,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // setting the NoC parameters
     noc_ctx.noc_model.set_noc_link_latency(1);
     noc_ctx.noc_model.set_noc_router_latency(1);
+    noc_ctx.noc_model.set_noc_link_bandwidth(1);
     // needs to be the same as above
     double router_latency = noc_ctx.noc_model.get_noc_router_latency();
     double link_latency = noc_ctx.noc_model.get_noc_link_latency();
@@ -1699,12 +1700,17 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         // we need to make the aggregate bandwidth cost and latency cost be a value that is larger or smaller than the tolerance value
         costs.noc_aggregate_bandwidth_cost += (costs.noc_aggregate_bandwidth_cost * error_tolerance * 2);
         costs.noc_latency_cost -= (costs.noc_latency_cost * error_tolerance * 2);
+        if (costs.noc_congestion_cost == 0) {
+            costs.noc_congestion_cost += MIN_EXPECTED_NOC_CONGESTION_COST * error_tolerance * 2;
+        } else {
+            costs.noc_congestion_cost += costs.noc_congestion_cost * error_tolerance * 2;
+        }
 
         // run the test function
         int error = check_noc_placement_costs(costs, error_tolerance, noc_opts);
 
-        // we expect error to be 2 here, meaning the found costs are not within the tolerance range
-        REQUIRE(error == 2);
+        // we expect error to be 3 here, meaning the found costs are not within the tolerance range
+        REQUIRE(error == 3);
     }
     // need to delete local noc routing algorithm
     delete routing_algorithm;

From 41af9bcd3c9628ecffede6256ced1f1c8ed3cad9 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Fri, 26 Jan 2024 13:47:52 -0500
Subject: [PATCH 16/41] Remove init_chan() call

---
 vpr/src/base/place_and_route.cpp | 25 +++++++++++--------------
 vpr/src/base/place_and_route.h   |  4 +++-
 vpr/src/place/place.cpp          | 14 ++------------
 3 files changed, 16 insertions(+), 27 deletions(-)

diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp
index b1916852a34..186193744ce 100644
--- a/vpr/src/base/place_and_route.cpp
+++ b/vpr/src/base/place_and_route.cpp
@@ -5,6 +5,7 @@
 #include <climits>
 #include <cstdlib>
 #include <cmath>
+#include <algorithm>
 
 #include "vtr_util.h"
 #include "vtr_memory.h"
@@ -425,7 +426,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
  * is used to determine if the channel width should be rounded to an
  * even number.
  */
-t_chan_width init_chan(int cfactor, t_chan_width_dist chan_width_dist, t_graph_type graph_directionality) {
+t_chan_width init_chan(int cfactor, const t_chan_width_dist& chan_width_dist, t_graph_type graph_directionality) {
     auto& device_ctx = g_vpr_ctx.mutable_device();
     auto& grid = device_ctx.grid;
 
@@ -460,19 +461,15 @@ t_chan_width init_chan(int cfactor, t_chan_width_dist chan_width_dist, t_graph_t
         }
     }
 
-    chan_width.max = 0;
-    chan_width.x_max = chan_width.y_max = INT_MIN;
-    chan_width.x_min = chan_width.y_min = INT_MAX;
-    for (size_t i = 0; i < grid.height(); ++i) {
-        chan_width.x_max = std::max(chan_width.x_max, chan_width.x_list[i]);
-        chan_width.x_min = std::min(chan_width.x_min, chan_width.x_list[i]);
-    }
-    chan_width.max = std::max(chan_width.max, chan_width.x_max);
-    for (size_t i = 0; i < grid.width(); ++i) {
-        chan_width.y_max = std::max(chan_width.y_max, chan_width.y_list[i]);
-        chan_width.y_min = std::min(chan_width.y_min, chan_width.y_list[i]);
-    }
-    chan_width.max = std::max(chan_width.max, chan_width.y_max);
+    auto minmax = std::minmax_element(chan_width.x_list.begin(), chan_width.x_list.end());
+    chan_width.x_min = *minmax.first;
+    chan_width.x_max = *minmax.second;
+
+    minmax = std::minmax_element(chan_width.y_list.begin(), chan_width.y_list.end());
+    chan_width.y_min = *minmax.first;
+    chan_width.y_max = *minmax.second;
+
+    chan_width.max = std::max(chan_width.x_max, chan_width.y_max);
 
 #ifdef VERBOSE
     VTR_LOG("\n");
diff --git a/vpr/src/base/place_and_route.h b/vpr/src/base/place_and_route.h
index 3ec8ca9030c..7a59fa02795 100644
--- a/vpr/src/base/place_and_route.h
+++ b/vpr/src/base/place_and_route.h
@@ -40,7 +40,9 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
                                   std::shared_ptr<RoutingDelayCalculator> delay_calc,
                                   bool is_flat);
 
-t_chan_width init_chan(int cfactor, t_chan_width_dist chan_width_dist, t_graph_type graph_directionality);
+t_chan_width init_chan(int cfactor,
+                       const t_chan_width_dist& chan_width_dist,
+                       t_graph_type graph_directionality);
 
 void post_place_sync();
 
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 28a922dbe3f..b66017c4fc6 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -577,7 +577,7 @@ void try_place(const Netlist<>& net_list,
 
     auto& timing_ctx = g_vpr_ctx.timing();
     auto pre_place_timing_stats = timing_ctx.stats;
-    int tot_iter, moves_since_cost_recompute, width_fac, num_connections,
+    int tot_iter, moves_since_cost_recompute, num_connections,
         outer_crit_iter_count, inner_recompute_limit;
     float first_crit_exponent, first_rlim, first_t;
     int first_move_lim;
@@ -592,7 +592,6 @@ void try_place(const Netlist<>& net_list,
     t_placer_statistics stats;
 
     t_placement_checkpoint placement_checkpoint;
-    t_graph_type graph_directionality;
 
     std::shared_ptr<SetupTimingInfo> timing_info;
     std::shared_ptr<PlacementDelayCalculator> placement_delay_calc;
@@ -650,16 +649,6 @@ void try_place(const Netlist<>& net_list,
     //create the move generator based on the chosen strategy
     create_move_generators(move_generator, move_generator2, placer_opts, move_lim);
 
-    width_fac = placer_opts.place_chan_width;
-
-    if (router_opts.route_type == GLOBAL) {
-        graph_directionality = GRAPH_BIDIR;
-    } else {
-        graph_directionality = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
-    }
-
-    init_chan(width_fac, chan_width_dist, graph_directionality);
-
     alloc_and_load_placement_structs(placer_opts.place_cost_exp, placer_opts, noc_opts, directs, num_directs);
 
     vtr::ScopedStartFinishTimer timer("Placement");
@@ -692,6 +681,7 @@ void try_place(const Netlist<>& net_list,
         place_sync_external_block_connections(block_id);
     }
 
+    const int width_fac = placer_opts.place_chan_width;
     init_draw_coords((float)width_fac);
 
     /* Allocated here because it goes into timing critical code where each memory allocation is expensive */

From e9a27b4273cabfec8fbb20300262caab573db33b Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Fri, 26 Jan 2024 17:15:37 -0500
Subject: [PATCH 17/41] Update normalization factors during NoC initial
 placement

---
 vpr/src/place/initial_noc_placement.cpp | 69 ++++++++++++++++++++++++-
 vpr/src/place/place.cpp                 |  2 +-
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp
index d0aaa573bf7..aec088a1619 100644
--- a/vpr/src/place/initial_noc_placement.cpp
+++ b/vpr/src/place/initial_noc_placement.cpp
@@ -41,6 +41,16 @@ static void place_noc_routers_randomly(std::vector<ClusterBlockId>& unfixed_rout
  */
 static void noc_routers_anneal(const t_noc_opts& noc_opts);
 
+/**
+ * @brief Check whether normalization factors need to be updated.
+ *
+ *   @param costs Most recent NoC cost terms.
+ *   @param old_costs NoC cost terms from the last time normalization
+ *   factors were updated.
+ */
+static bool is_renormalization_needed(const t_placer_costs& costs,
+                                      const t_placer_costs& old_costs);
+
 static bool accept_noc_swap(double delta_cost, double prob) {
     if (delta_cost <= 0.0) {
         return true;
@@ -58,6 +68,40 @@ static bool accept_noc_swap(double delta_cost, double prob) {
     }
 }
 
+static bool is_renormalization_needed(const t_placer_costs& costs,
+                                      const t_placer_costs& old_costs) {
+    constexpr double COST_DIFF_TOLERANCE = 0.1;
+    bool renormalization_needed = false;
+    double cost_diff;
+
+    cost_diff = fabs(costs.noc_aggregate_bandwidth_cost - old_costs.noc_aggregate_bandwidth_cost);
+    // aggregate bandwidth has changed significantly
+    if (cost_diff > costs.noc_aggregate_bandwidth_cost * COST_DIFF_TOLERANCE) {
+        renormalization_needed = true;
+    }
+
+    cost_diff = (fabs(costs.noc_latency_cost - old_costs.noc_latency_cost));
+    // if latency cost only considers latency constraints, it might become zero
+    // a transition from zero or to zero cost necessitates renormalization
+    if ((costs.noc_latency_cost == 0.0 && old_costs.noc_latency_cost != 0.0) ||
+        (costs.noc_latency_cost != 0.0 && old_costs.noc_latency_cost == 0.0)) {
+        renormalization_needed = true;
+    } else if (cost_diff > costs.noc_latency_cost * COST_DIFF_TOLERANCE) {
+        renormalization_needed = true;
+    }
+
+    cost_diff = (fabs(costs.noc_congestion_cost - old_costs.noc_congestion_cost));
+    // a transition from zero or to zero cost necessitates renormalization
+    if ((costs.noc_congestion_cost == 0.0 && old_costs.noc_congestion_cost != 0.0) ||
+        (costs.noc_congestion_cost != 0.0 && old_costs.noc_congestion_cost == 0.0)) {
+        renormalization_needed = true;
+    } else if (cost_diff > costs.noc_congestion_cost * COST_DIFF_TOLERANCE) {
+        renormalization_needed = true;
+    }
+
+    return renormalization_needed;
+}
+
 static void place_constrained_noc_router(ClusterBlockId router_blk_id) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& floorplanning_ctx = g_vpr_ctx.floorplanning();
@@ -156,6 +200,8 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
 
     // Only NoC related costs are considered
     t_placer_costs costs;
+    // NoC costs from the last time normalization factors were updated
+    t_placer_costs old_costs;
 
     // Initialize NoC-related costs
     costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost();
@@ -163,6 +209,7 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     costs.noc_congestion_cost = comp_noc_congestion_cost(noc_opts);
     update_noc_normalization_factors(costs);
     costs.cost = calculate_noc_cost(NocCostTerms(costs), costs, noc_opts);
+    old_costs = costs;
 
     // Maximum distance in each direction that a router can travel in a move
     // It is assumed that NoC routers are organized in a square grid.
@@ -179,11 +226,16 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     // the constant factor above 35000.
     // Get all the router clusters and figure out how many of them exist
     const int num_router_clusters = noc_ctx.noc_traffic_flows_storage.get_router_clusters_in_netlist().size();
-    const int N_MOVES = num_router_clusters * 35000;
+    const int N_MOVES_PER_ROUTER = 35000;
+    const int N_MOVES = num_router_clusters * N_MOVES_PER_ROUTER;
+
+    const int RENORMALIZATION_LIM = 1024;
+    int renormalization_cnt = 0;
 
     const double starting_prob = 0.5;
     const double prob_step = starting_prob / N_MOVES;
 
+
     // The checkpoint stored the placement with the lowest cost.
     NoCPlacementCheckpoint checkpoint;
 
@@ -199,6 +251,9 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
      * Range limit and the probability of accepting swaps with positive delta cost
      * decrease linearly as more swaps are evaluated. Late in the annealing,
      * NoC routers are swapped only with their neighbors as the range limit approaches 1.
+     *
+     * After each RENORMALIZATION_LIM accepted moves, if NoC cost terms have changed
+     * significantly, I update the normalization factors and re-compute the total cost.
      */
 
     // Generate and evaluate router moves
@@ -224,9 +279,21 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
                 commit_move_blocks(blocks_affected);
                 commit_noc_costs();
                 costs += noc_delta_c;
+                // check if the current placement is better than the stored checkpoint
                 if (costs.cost < checkpoint.get_cost() || !checkpoint.is_valid()) {
                     checkpoint.save_checkpoint(costs.cost);
                 }
+
+                renormalization_cnt++;
+                if (renormalization_cnt == RENORMALIZATION_LIM) {
+                    renormalization_cnt = 0;
+                    if (is_renormalization_needed(costs, old_costs)) {
+                        update_noc_normalization_factors(costs);
+                        costs.cost = calculate_noc_cost(NocCostTerms(costs), costs, noc_opts);
+                        old_costs = costs;
+                    }
+                }
+
             } else { // The proposed move is rejected
                 revert_move_blocks(blocks_affected);
                 revert_noc_traffic_flow_routes(blocks_affected);
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index b66017c4fc6..5c4d5232e4b 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -999,7 +999,7 @@ void try_place(const Netlist<>& net_list,
                 }
             }
 
-            //move the appropoiate move_generator to be the current used move generator
+            //move the appropriate move_generator to be the current used move generator
             assign_current_move_generator(move_generator, move_generator2,
                                           agent_state, placer_opts, false, current_move_generator);
 

From f7731d2a4350b84aab5ae281b7676b0a127029bb Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Fri, 26 Jan 2024 17:21:18 -0500
Subject: [PATCH 18/41] pass strings by reference

---
 libs/libarchfpga/src/physical_types_util.h |  2 +-
 vpr/src/base/read_options.cpp              | 66 +++++++++++-----------
 vpr/src/base/vpr_api.cpp                   |  8 +--
 vpr/src/base/vpr_api.h                     |  2 +-
 vpr/src/util/vpr_utils.cpp                 | 14 ++---
 vpr/src/util/vpr_utils.h                   |  8 +--
 6 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h
index e27ba096b54..4d9c3013682 100644
--- a/libs/libarchfpga/src/physical_types_util.h
+++ b/libs/libarchfpga/src/physical_types_util.h
@@ -171,7 +171,7 @@ std::vector<std::string> block_type_class_index_to_pin_names(t_physical_tile_typ
                                                              bool is_flat);
 
 ///@brief Returns the physical tile type matching a given physical tile type name, or nullptr (if not found)
-t_physical_tile_type_ptr find_tile_type_by_name(std::string name, const std::vector<t_physical_tile_type>& types);
+t_physical_tile_type_ptr find_tile_type_by_name(const std::string& name, const std::vector<t_physical_tile_type>& types);
 
 int find_pin_class(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port, e_pin_type pin_type);
 
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 971f92b5071..2a0454612b6 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -62,7 +62,7 @@ struct ParseOnOff {
 };
 
 struct ParseArchFormat {
-    ConvertedValue<e_arch_format> from_str(std::string str) {
+    ConvertedValue<e_arch_format> from_str(const std::string& str) {
         ConvertedValue<e_arch_format> conv_value;
         if (str == "vtr")
             conv_value.set_value(e_arch_format::VTR);
@@ -94,7 +94,7 @@ struct ParseArchFormat {
     }
 };
 struct ParseCircuitFormat {
-    ConvertedValue<e_circuit_format> from_str(std::string str) {
+    ConvertedValue<e_circuit_format> from_str(const std::string& str) {
         ConvertedValue<e_circuit_format> conv_value;
         if (str == "auto")
             conv_value.set_value(e_circuit_format::AUTO);
@@ -134,7 +134,7 @@ struct ParseCircuitFormat {
     }
 };
 struct ParseRoutePredictor {
-    ConvertedValue<e_routing_failure_predictor> from_str(std::string str) {
+    ConvertedValue<e_routing_failure_predictor> from_str(const std::string& str) {
         ConvertedValue<e_routing_failure_predictor> conv_value;
         if (str == "safe")
             conv_value.set_value(SAFE);
@@ -170,7 +170,7 @@ struct ParseRoutePredictor {
 };
 
 struct ParseRouterAlgorithm {
-    ConvertedValue<e_router_algorithm> from_str(std::string str) {
+    ConvertedValue<e_router_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_router_algorithm> conv_value;
         if (str == "parallel")
             conv_value.set_value(PARALLEL);
@@ -201,7 +201,7 @@ struct ParseRouterAlgorithm {
 };
 
 struct ParseNodeReorderAlgorithm {
-    ConvertedValue<e_rr_node_reorder_algorithm> from_str(std::string str) {
+    ConvertedValue<e_rr_node_reorder_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_rr_node_reorder_algorithm> conv_value;
         if (str == "none")
             conv_value.set_value(DONT_REORDER);
@@ -236,7 +236,7 @@ struct ParseNodeReorderAlgorithm {
 };
 
 struct RouteBudgetsAlgorithm {
-    ConvertedValue<e_routing_budgets_algorithm> from_str(std::string str) {
+    ConvertedValue<e_routing_budgets_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_routing_budgets_algorithm> conv_value;
         if (str == "minimax")
             conv_value.set_value(MINIMAX);
@@ -276,7 +276,7 @@ struct RouteBudgetsAlgorithm {
 };
 
 struct ParseRouteType {
-    ConvertedValue<e_route_type> from_str(std::string str) {
+    ConvertedValue<e_route_type> from_str(const std::string& str) {
         ConvertedValue<e_route_type> conv_value;
         if (str == "global")
             conv_value.set_value(GLOBAL);
@@ -307,7 +307,7 @@ struct ParseRouteType {
 };
 
 struct ParseBaseCost {
-    ConvertedValue<e_base_cost_type> from_str(std::string str) {
+    ConvertedValue<e_base_cost_type> from_str(const std::string& str) {
         ConvertedValue<e_base_cost_type> conv_value;
         if (str == "delay_normalized")
             conv_value.set_value(DELAY_NORMALIZED);
@@ -358,7 +358,7 @@ struct ParseBaseCost {
 };
 
 struct ParsePlaceDeltaDelayAlgorithm {
-    ConvertedValue<e_place_delta_delay_algorithm> from_str(std::string str) {
+    ConvertedValue<e_place_delta_delay_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_place_delta_delay_algorithm> conv_value;
         if (str == "astar")
             conv_value.set_value(e_place_delta_delay_algorithm::ASTAR_ROUTE);
@@ -389,7 +389,7 @@ struct ParsePlaceDeltaDelayAlgorithm {
 };
 
 struct ParsePlaceAlgorithm {
-    ConvertedValue<e_place_algorithm> from_str(std::string str) {
+    ConvertedValue<e_place_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_place_algorithm> conv_value;
         if (str == "bounding_box") {
             conv_value.set_value(BOUNDING_BOX_PLACE);
@@ -431,7 +431,7 @@ struct ParsePlaceAlgorithm {
 };
 
 struct ParsePlaceBoundingBox {
-    ConvertedValue<e_place_bounding_box_mode> from_str(std::string str) {
+    ConvertedValue<e_place_bounding_box_mode> from_str(const std::string& str) {
         ConvertedValue<e_place_bounding_box_mode> conv_value;
         if (str == "auto_bb") {
             conv_value.set_value(AUTO_BB);
@@ -466,7 +466,7 @@ struct ParsePlaceBoundingBox {
 };
 
 struct ParsePlaceAgentAlgorithm {
-    ConvertedValue<e_agent_algorithm> from_str(std::string str) {
+    ConvertedValue<e_agent_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_agent_algorithm> conv_value;
         if (str == "e_greedy")
             conv_value.set_value(E_GREEDY);
@@ -497,7 +497,7 @@ struct ParsePlaceAgentAlgorithm {
 };
 
 struct ParsePlaceAgentSpace {
-    ConvertedValue<e_agent_space> from_str(std::string str) {
+    ConvertedValue<e_agent_space> from_str(const std::string& str) {
         ConvertedValue<e_agent_space> conv_value;
         if (str == "move_type")
             conv_value.set_value(e_agent_space::MOVE_TYPE);
@@ -528,7 +528,7 @@ struct ParsePlaceAgentSpace {
 };
 
 struct ParseFixPins {
-    ConvertedValue<e_pad_loc_type> from_str(std::string str) {
+    ConvertedValue<e_pad_loc_type> from_str(const std::string& str) {
         ConvertedValue<e_pad_loc_type> conv_value;
         if (str == "free")
             conv_value.set_value(FREE);
@@ -559,7 +559,7 @@ struct ParseFixPins {
 };
 
 struct ParseClusterSeed {
-    ConvertedValue<e_cluster_seed> from_str(std::string str) {
+    ConvertedValue<e_cluster_seed> from_str(const std::string& str) {
         ConvertedValue<e_cluster_seed> conv_value;
         if (str == "timing")
             conv_value.set_value(e_cluster_seed::TIMING);
@@ -606,7 +606,7 @@ struct ParseClusterSeed {
 };
 
 struct ParseConstantNetMethod {
-    ConvertedValue<e_constant_net_method> from_str(std::string str) {
+    ConvertedValue<e_constant_net_method> from_str(const std::string& str) {
         ConvertedValue<e_constant_net_method> conv_value;
         if (str == "global")
             conv_value.set_value(CONSTANT_NET_GLOBAL);
@@ -637,7 +637,7 @@ struct ParseConstantNetMethod {
 };
 
 struct ParseTimingReportDetail {
-    ConvertedValue<e_timing_report_detail> from_str(std::string str) {
+    ConvertedValue<e_timing_report_detail> from_str(const std::string& str) {
         ConvertedValue<e_timing_report_detail> conv_value;
         if (str == "netlist")
             conv_value.set_value(e_timing_report_detail::NETLIST);
@@ -677,7 +677,7 @@ struct ParseTimingReportDetail {
 };
 
 struct ParseClockModeling {
-    ConvertedValue<e_clock_modeling> from_str(std::string str) {
+    ConvertedValue<e_clock_modeling> from_str(const std::string& str) {
         ConvertedValue<e_clock_modeling> conv_value;
         if (str == "ideal")
             conv_value.set_value(IDEAL_CLOCK);
@@ -715,7 +715,7 @@ struct ParseClockModeling {
 };
 
 struct ParseUnrelatedClustering {
-    ConvertedValue<e_unrelated_clustering> from_str(std::string str) {
+    ConvertedValue<e_unrelated_clustering> from_str(const std::string& str) {
         ConvertedValue<e_unrelated_clustering> conv_value;
         if (str == "on")
             conv_value.set_value(e_unrelated_clustering::ON);
@@ -753,7 +753,7 @@ struct ParseUnrelatedClustering {
 };
 
 struct ParseBalanceBlockTypeUtil {
-    ConvertedValue<e_balance_block_type_util> from_str(std::string str) {
+    ConvertedValue<e_balance_block_type_util> from_str(const std::string& str) {
         ConvertedValue<e_balance_block_type_util> conv_value;
         if (str == "on")
             conv_value.set_value(e_balance_block_type_util::ON);
@@ -791,7 +791,7 @@ struct ParseBalanceBlockTypeUtil {
 };
 
 struct ParseConstGenInference {
-    ConvertedValue<e_const_gen_inference> from_str(std::string str) {
+    ConvertedValue<e_const_gen_inference> from_str(const std::string& str) {
         ConvertedValue<e_const_gen_inference> conv_value;
         if (str == "none")
             conv_value.set_value(e_const_gen_inference::NONE);
@@ -829,7 +829,7 @@ struct ParseConstGenInference {
 };
 
 struct ParseIncrRerouteDelayRipup {
-    ConvertedValue<e_incr_reroute_delay_ripup> from_str(std::string str) {
+    ConvertedValue<e_incr_reroute_delay_ripup> from_str(const std::string& str) {
         ConvertedValue<e_incr_reroute_delay_ripup> conv_value;
         if (str == "on")
             conv_value.set_value(e_incr_reroute_delay_ripup::ON);
@@ -867,7 +867,7 @@ struct ParseIncrRerouteDelayRipup {
 };
 
 struct ParseRouteBBUpdate {
-    ConvertedValue<e_route_bb_update> from_str(std::string str) {
+    ConvertedValue<e_route_bb_update> from_str(const std::string& str) {
         ConvertedValue<e_route_bb_update> conv_value;
         if (str == "static")
             conv_value.set_value(e_route_bb_update::STATIC);
@@ -901,7 +901,7 @@ struct ParseRouteBBUpdate {
 };
 
 struct ParseRouterLookahead {
-    ConvertedValue<e_router_lookahead> from_str(std::string str) {
+    ConvertedValue<e_router_lookahead> from_str(const std::string& str) {
         ConvertedValue<e_router_lookahead> conv_value;
         if (str == "classic")
             conv_value.set_value(e_router_lookahead::CLASSIC);
@@ -939,7 +939,7 @@ struct ParseRouterLookahead {
 };
 
 struct ParsePlaceDelayModel {
-    ConvertedValue<PlaceDelayModelType> from_str(std::string str) {
+    ConvertedValue<PlaceDelayModelType> from_str(const std::string& str) {
         ConvertedValue<PlaceDelayModelType> conv_value;
         if (str == "delta")
             conv_value.set_value(PlaceDelayModelType::DELTA);
@@ -973,7 +973,7 @@ struct ParsePlaceDelayModel {
 };
 
 struct ParseReducer {
-    ConvertedValue<e_reducer> from_str(std::string str) {
+    ConvertedValue<e_reducer> from_str(const std::string& str) {
         ConvertedValue<e_reducer> conv_value;
         if (str == "min")
             conv_value.set_value(e_reducer::MIN);
@@ -1016,7 +1016,7 @@ struct ParseReducer {
 };
 
 struct ParseRouterFirstIterTiming {
-    ConvertedValue<e_router_initial_timing> from_str(std::string str) {
+    ConvertedValue<e_router_initial_timing> from_str(const std::string& str) {
         ConvertedValue<e_router_initial_timing> conv_value;
         if (str == "all_critical")
             conv_value.set_value(e_router_initial_timing::ALL_CRITICAL);
@@ -1047,7 +1047,7 @@ struct ParseRouterFirstIterTiming {
 };
 
 struct ParseRouterHeap {
-    ConvertedValue<e_heap_type> from_str(std::string str) {
+    ConvertedValue<e_heap_type> from_str(const std::string& str) {
         ConvertedValue<e_heap_type> conv_value;
         if (str == "binary")
             conv_value.set_value(e_heap_type::BINARY_HEAP);
@@ -1078,7 +1078,7 @@ struct ParseRouterHeap {
 };
 
 struct ParseCheckRoute {
-    ConvertedValue<e_check_route_option> from_str(std::string str) {
+    ConvertedValue<e_check_route_option> from_str(const std::string& str) {
         ConvertedValue<e_check_route_option> conv_value;
         if (str == "off")
             conv_value.set_value(e_check_route_option::OFF);
@@ -1113,7 +1113,7 @@ struct ParseCheckRoute {
 };
 
 struct ParsePlaceEfforScaling {
-    ConvertedValue<e_place_effort_scaling> from_str(std::string str) {
+    ConvertedValue<e_place_effort_scaling> from_str(const std::string& str) {
         ConvertedValue<e_place_effort_scaling> conv_value;
         if (str == "circuit")
             conv_value.set_value(e_place_effort_scaling::CIRCUIT);
@@ -1144,7 +1144,7 @@ struct ParsePlaceEfforScaling {
 };
 
 struct ParseTimingUpdateType {
-    ConvertedValue<e_timing_update_type> from_str(std::string str) {
+    ConvertedValue<e_timing_update_type> from_str(const std::string& str) {
         ConvertedValue<e_timing_update_type> conv_value;
         if (str == "auto")
             conv_value.set_value(e_timing_update_type::AUTO);
@@ -1179,7 +1179,7 @@ struct ParseTimingUpdateType {
 };
 
 struct ParsePostSynthNetlistUnconnInputHandling {
-    ConvertedValue<e_post_synth_netlist_unconn_handling> from_str(std::string str) {
+    ConvertedValue<e_post_synth_netlist_unconn_handling> from_str(const std::string& str) {
         ConvertedValue<e_post_synth_netlist_unconn_handling> conv_value;
         if (str == "unconnected")
             conv_value.set_value(e_post_synth_netlist_unconn_handling::UNCONNECTED);
@@ -1218,7 +1218,7 @@ struct ParsePostSynthNetlistUnconnInputHandling {
 };
 
 struct ParsePostSynthNetlistUnconnOutputHandling {
-    ConvertedValue<e_post_synth_netlist_unconn_handling> from_str(std::string str) {
+    ConvertedValue<e_post_synth_netlist_unconn_handling> from_str(const std::string& str) {
         ConvertedValue<e_post_synth_netlist_unconn_handling> conv_value;
         if (str == "unconnected")
             conv_value.set_value(e_post_synth_netlist_unconn_handling::UNCONNECTED);
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index cc5d23343de..47733286088 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -251,7 +251,7 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a
      * Initialize the functions names for which VPR_ERRORs
      * are demoted to VTR_LOG_WARNs
      */
-    for (std::string func_name : vtr::split(options->disable_errors, std::string(":"))) {
+    for (const std::string& func_name : vtr::split(options->disable_errors, std::string(":"))) {
         map_error_activation_status(func_name);
     }
 
@@ -272,7 +272,7 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a
     }
 
     set_noisy_warn_log_file(warn_log_file);
-    for (std::string func_name : vtr::split(warn_functions, std::string(":"))) {
+    for (const std::string& func_name : vtr::split(warn_functions, std::string(":"))) {
         add_warnings_to_suppress(func_name);
     }
 
@@ -559,7 +559,7 @@ void vpr_setup_noc(const t_vpr_setup& vpr_setup, const t_arch& arch) {
  * @param noc_routing_algorithm_name A user provided string that identifies a
  * NoC routing algorithm
  */
-void vpr_setup_noc_routing_algorithm(std::string noc_routing_algorithm_name) {
+void vpr_setup_noc_routing_algorithm(const std::string& noc_routing_algorithm_name) {
     // Need to be abke to modify the NoC context, since we will be adding the
     // newly created routing algorithm to it
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
@@ -1085,7 +1085,7 @@ static void get_intercluster_switch_fanin_estimates(const t_vpr_setup& vpr_setup
 
     auto type = find_most_common_tile_type(grid);
     /* get Fc_in/out for most common block (e.g. logic blocks) */
-    VTR_ASSERT(type->fc_specs.size() > 0);
+    VTR_ASSERT(!type->fc_specs.empty());
 
     //Estimate the maximum Fc_in/Fc_out
 
diff --git a/vpr/src/base/vpr_api.h b/vpr/src/base/vpr_api.h
index 15509be1115..b4c89e25051 100644
--- a/vpr/src/base/vpr_api.h
+++ b/vpr/src/base/vpr_api.h
@@ -144,7 +144,7 @@ void vpr_close_graphics(const t_vpr_setup& vpr_setup);
 void vpr_setup_clock_networks(t_vpr_setup& vpr_setup, const t_arch& Arch);
 
 void vpr_setup_noc(const t_vpr_setup& vpr_setup, const t_arch& arch);
-void vpr_setup_noc_routing_algorithm(std::string noc_routing_algorithm_name);
+void vpr_setup_noc_routing_algorithm(const std::string& noc_routing_algorithm_name);
 
 void vpr_free_vpr_data_structures(t_arch& Arch, t_vpr_setup& vpr_setup);
 void vpr_free_all(t_arch& Arch,
diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index b200a06ba7f..8c1faaafdff 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -3,6 +3,7 @@
 #include <regex>
 #include <algorithm>
 #include <sstream>
+#include <string.h>
 
 #include "vtr_assert.h"
 #include "vtr_log.h"
@@ -17,7 +18,6 @@
 #include "vpr_utils.h"
 #include "cluster_placement.h"
 #include "place_macro.h"
-#include "string.h"
 #include "pack_types.h"
 #include "device_grid.h"
 #include "timing_fail_error.h"
@@ -181,7 +181,7 @@ void sync_grid_to_blocks() {
         }
 
         if (device_ctx.grid.get_width_offset({blk_x, blk_y, blk_layer}) != 0 || device_ctx.grid.get_height_offset({blk_x, blk_y, blk_layer}) != 0) {
-            VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Large block not aligned in placment for cluster_ctx.blocks %lu at (%d, %d, %d, %d).",
+            VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Large block not aligned in placement for cluster_ctx.blocks %lu at (%d, %d, %d, %d).",
                             size_t(blk_id), blk_x, blk_y, blk_z, blk_layer);
         }
 
@@ -675,7 +675,7 @@ void get_pin_range_for_block(const ClusterBlockId blk_id,
     *pin_high = sub_tile.sub_tile_to_tile_pin_indices[rel_pin_high];
 }
 
-t_physical_tile_type_ptr find_tile_type_by_name(std::string name, const std::vector<t_physical_tile_type>& types) {
+t_physical_tile_type_ptr find_tile_type_by_name(const std::string& name, const std::vector<t_physical_tile_type>& types) {
     for (auto const& type : types) {
         if (type.name == name) {
             return &type;
@@ -814,7 +814,7 @@ t_physical_tile_type_ptr find_most_common_tile_type(const DeviceGrid& grid) {
     return max_type;
 }
 
-InstPort parse_inst_port(std::string str) {
+InstPort parse_inst_port(const std::string& str) {
     InstPort inst_port(str);
 
     auto& device_ctx = g_vpr_ctx.device();
@@ -1172,7 +1172,7 @@ t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int
     return nullptr;
 }
 
-const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, std::string port_name) {
+const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, const std::string& port_name) {
     const t_pb_graph_pin* gpin = find_pb_graph_pin(pb_gnode, port_name, 0);
 
     if (gpin != nullptr) {
@@ -1181,7 +1181,7 @@ const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, std::string po
     return nullptr;
 }
 
-const t_pb_graph_pin* find_pb_graph_pin(const t_pb_graph_node* pb_gnode, std::string port_name, int index) {
+const t_pb_graph_pin* find_pb_graph_pin(const t_pb_graph_node* pb_gnode, const std::string& port_name, int index) {
     for (int iport = 0; iport < pb_gnode->num_input_ports; iport++) {
         if (pb_gnode->num_input_pins[iport] < index) continue;
 
@@ -2240,7 +2240,7 @@ void pretty_print_float(const char* prefix, double value, int num_digits, int sc
     }
 }
 
-void print_timing_stats(std::string name,
+void print_timing_stats(const std::string& name,
                         const t_timing_analysis_profile_info& current,
                         const t_timing_analysis_profile_info& past) {
     VTR_LOG("%s timing analysis took %g seconds (%g STA, %g slack) (%zu full updates: %zu setup, %zu hold, %zu combined).\n",
diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h
index 75842967cd1..cdc1281cbca 100644
--- a/vpr/src/util/vpr_utils.h
+++ b/vpr/src/util/vpr_utils.h
@@ -153,10 +153,10 @@ std::vector<AtomPinId> find_clb_pin_sink_atom_pins(ClusterBlockId clb, int logic
 std::tuple<ClusterNetId, int, int> find_pb_route_clb_input_net_pin(ClusterBlockId clb, int sink_pb_route_id);
 
 //Returns the port matching name within pb_gnode
-const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, std::string port_name);
+const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, const std::string& port_name);
 
 //Returns the graph pin matching name at pin index
-const t_pb_graph_pin* find_pb_graph_pin(const t_pb_graph_node* pb_gnode, std::string port_name, int index);
+const t_pb_graph_pin* find_pb_graph_pin(const t_pb_graph_node* pb_gnode, const std::string& port_name, int index);
 
 AtomPinId find_atom_pin(ClusterBlockId blk_id, const t_pb_graph_pin* pb_gpin);
 
@@ -168,7 +168,7 @@ t_physical_tile_type_ptr find_most_common_tile_type(const DeviceGrid& grid);
 
 //Parses a block_name.port[x:y] (e.g. LAB.data_in[3:10]) pin range specification, if no pin range is specified
 //looks-up the block port and fills in the full range
-InstPort parse_inst_port(std::string str);
+InstPort parse_inst_port(const std::string& str);
 
 //Returns the block type which is most likely the logic block
 t_logical_block_type_ptr infer_logic_block_type(const DeviceGrid& grid);
@@ -250,7 +250,7 @@ int max_pins_per_grid_tile();
 void pretty_print_uint(const char* prefix, size_t value, int num_digits, int scientific_precision);
 void pretty_print_float(const char* prefix, double value, int num_digits, int scientific_precision);
 
-void print_timing_stats(std::string name,
+void print_timing_stats(const std::string& name,
                         const t_timing_analysis_profile_info& current,
                         const t_timing_analysis_profile_info& past = t_timing_analysis_profile_info());
 

From 4a22e5b0efbe3b3b2d040102d82914d63724536e Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 29 Jan 2024 18:35:15 -0500
Subject: [PATCH 19/41] Print NoC metrics in print_place_status()

---
 vpr/src/place/noc_place_utils.cpp | 14 +++--
 vpr/src/place/place.cpp           | 96 ++++++++++++++++++++++---------
 2 files changed, 77 insertions(+), 33 deletions(-)

diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 525464b03e7..fe863e86acd 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -482,13 +482,15 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
     }
 
     // if congestion cost is zero, we use a small cost for calculating the accepted error range
-    double non_zero_congestion_cost = (costs.noc_congestion_cost == 0) ? MIN_EXPECTED_NOC_CONGESTION_COST : costs.noc_congestion_cost;
+    double non_zero_congestion_cost = (costs.noc_congestion_cost < MIN_EXPECTED_NOC_CONGESTION_COST) ? MIN_EXPECTED_NOC_CONGESTION_COST : costs.noc_congestion_cost;
     // check whether the NoC congestion cost is within the error range
-    if (fabs(cost_check.congestion - costs.noc_congestion_cost) > non_zero_congestion_cost * error_tolerance) {
-        VTR_LOG_ERROR(
-            "noc_congestion_cost_check: %g and noc_congestion_cost: %g differ in check_noc_placement_costs.\n",
-            cost_check.congestion, costs.noc_congestion_cost);
-        error++;
+    if (fabs(cost_check.congestion - costs.noc_congestion_cost) > non_zero_congestion_cost){
+        if (!(cost_check.congestion < MIN_EXPECTED_NOC_CONGESTION_COST && costs.noc_congestion_cost < MIN_EXPECTED_NOC_CONGESTION_COST)) {
+            VTR_LOG_ERROR(
+                "noc_congestion_cost_check: %g and noc_congestion_cost: %g differ in check_noc_placement_costs.\n",
+                cost_check.congestion, costs.noc_congestion_cost);
+            error++;
+        }
     }
 
     return error;
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 5c4d5232e4b..1f37581f7af 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -526,7 +526,7 @@ static void calculate_reward_and_process_outcome(
     float timing_bb_factor,
     MoveGenerator& move_generator);
 
-static void print_place_status_header();
+static void print_place_status_header(bool noc_enabled);
 
 static void print_place_status(const t_annealing_state& state,
                                const t_placer_statistics& stats,
@@ -534,7 +534,11 @@ static void print_place_status(const t_annealing_state& state,
                                float cpd,
                                float sTNS,
                                float sWNS,
-                               size_t tot_moves);
+                               size_t tot_moves,
+                               bool noc_enabled,
+                               float noc_agg_bw,
+                               float noc_agg_latency,
+                               float noc_cong);
 
 static void print_resources_utilization();
 
@@ -820,12 +824,15 @@ void try_place(const Netlist<>& net_list,
     VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost,
             costs.bb_cost, costs.timing_cost);
     if (noc_opts.noc) {
-        VTR_LOG("NoC Placement Costs. noc_aggregate_bandwidth_cost: %g, "
+        VTR_LOG("NoC Placement Costs. "
+            "noc cost: %g, "
+            "noc_aggregate_bandwidth_cost: %g, "
             "noc_latency_cost: %g, "
             "noc_latency_constraints_cost: %d, "
             "noc_congestion_cost: %g, "
             "accum_congested_ratio: %g, "
             "n_congested_links: %d \n",
+            calculate_noc_cost(NocCostTerms(costs), costs, noc_opts),
             costs.noc_aggregate_bandwidth_cost,
             costs.noc_latency_cost,
             get_number_of_traffic_flows_with_latency_cons_met(),
@@ -865,12 +872,15 @@ void try_place(const Netlist<>& net_list,
             costs.cost, costs.bb_cost, costs.timing_cost, width_fac);
     if (noc_opts.noc) {
         sprintf(msg,
-                "\nInitial NoC Placement Costs. noc_aggregate_bandwidth_cost: %g "
+                "\nInitial NoC Placement Costs. "
+                "noc cost: %g, "
+                "noc_aggregate_bandwidth_cost: %g "
                 "noc_latency_cost: %g "
                 "noc_latency_constraints_cost: %d "
                 "noc_congestion_cost: %g "
                 "accum_congested_ratio: %g, "
                 "n_congested_links: %d",
+                calculate_noc_cost(NocCostTerms(costs), costs, noc_opts),
                 costs.noc_aggregate_bandwidth_cost,
                 costs.noc_latency_cost,
                 get_number_of_traffic_flows_with_latency_cons_met(),
@@ -973,7 +983,7 @@ void try_place(const Netlist<>& net_list,
     if (skip_anneal == false) {
         //Table header
         VTR_LOG("\n");
-        print_place_status_header();
+        print_place_status_header(noc_opts.noc);
 
         /* Outer loop of the simulated annealing begins */
         do {
@@ -1022,7 +1032,9 @@ void try_place(const Netlist<>& net_list,
             ++state.num_temps;
 
             print_place_status(state, stats, temperature_timer.elapsed_sec(),
-                               critical_path.delay(), sTNS, sWNS, tot_iter);
+                               critical_path.delay(), sTNS, sWNS, tot_iter,
+                               noc_opts.noc, costs.noc_aggregate_bandwidth_cost,
+                               costs.noc_latency_cost, costs.noc_congestion_cost);
 
             if (placer_opts.place_algorithm.is_timing_driven()
                 && placer_opts.place_agent_multistate
@@ -1093,7 +1105,9 @@ void try_place(const Netlist<>& net_list,
         }
 
         print_place_status(state, stats, temperature_timer.elapsed_sec(),
-                           critical_path.delay(), sTNS, sWNS, tot_iter);
+                           critical_path.delay(), sTNS, sWNS, tot_iter,
+                           noc_opts.noc, costs.noc_aggregate_bandwidth_cost,
+                           costs.noc_latency_cost, costs.noc_congestion_cost);
     }
     auto post_quench_timing_stats = timing_ctx.stats;
 
@@ -1185,12 +1199,15 @@ void try_place(const Netlist<>& net_list,
     // print the noc costs info
     if (noc_opts.noc) {
         sprintf(msg,
-                "\nNoC Placement Costs. noc_aggregate_bandwidth_cost: %g "
+                "\nNoC Placement Costs. "
+                "noc cost: %g, "
+                "noc_aggregate_bandwidth_cost: %g "
                 "noc_latency_cost: %g "
                 "noc_latency_constraints_cost: %d "
                 "noc_congestion_cost: %g "
                 "accum_congested_ratio: %g, "
                 "n_congested_links: %d",
+                calculate_noc_cost(NocCostTerms(costs), costs, noc_opts),
                 costs.noc_aggregate_bandwidth_cost,
                 costs.noc_latency_cost,
                 get_number_of_traffic_flows_with_latency_cons_met(),
@@ -1198,12 +1215,15 @@ void try_place(const Netlist<>& net_list,
                 get_total_congestion_bandwidth_ratio(),
                 get_number_of_congested_noc_links());
 
-        VTR_LOG("NoC Placement Costs. noc_aggregate_bandwidth_cost: %g, "
+        VTR_LOG("\nNoC Placement Costs. "
+            "noc cost: %g, "
+            "noc_aggregate_bandwidth_cost: %g "
             "noc_latency_cost: %g, "
             "noc_latency_constraints_cost: %d, "
             "noc_congestion_cost: %g, "
             "accum_congested_ratio: %g, "
             "n_congested_links: %d \n",
+            calculate_noc_cost(NocCostTerms(costs), costs, noc_opts),
             costs.noc_aggregate_bandwidth_cost,
             costs.noc_latency_cost,
             get_number_of_traffic_flows_with_latency_cons_met(),
@@ -2314,7 +2334,7 @@ static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_
     }
 
     if (noc_opts.noc) {
-        // in noc mode we include noc agggregate bandwidth and noc latency
+        // in noc mode we include noc aggregate bandwidth and noc latency
         total_cost += calculate_noc_cost(NocCostTerms(*costs), *costs, noc_opts);
     }
 
@@ -4195,15 +4215,27 @@ static void update_screen_debug() {
 }
 #endif
 
-static void print_place_status_header() {
-    VTR_LOG(
-        "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
-    VTR_LOG(
-        "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha\n");
-    VTR_LOG(
-        "      (sec)                                          (ns)       (ns)     (ns)                                                 \n");
-    VTR_LOG(
-        "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
+static void print_place_status_header(bool noc_enabled) {
+    if (!noc_enabled) {
+        VTR_LOG(
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
+        VTR_LOG(
+            "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha\n");
+        VTR_LOG(
+            "      (sec)                                          (ns)       (ns)     (ns)                                                 \n");
+        VTR_LOG(
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
+    } else {
+        VTR_LOG(
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- ---------\n");
+        VTR_LOG(
+            "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha Agg. BW  Agg. Lat NoC Cong.\n");
+        VTR_LOG(
+            "      (sec)                                          (ns)       (ns)     (ns)                                                   (bps)     (ns)            \n");
+        VTR_LOG(
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- ---------\n");
+    }
+
 }
 
 static void print_place_status(const t_annealing_state& state,
@@ -4212,22 +4244,32 @@ static void print_place_status(const t_annealing_state& state,
                                float cpd,
                                float sTNS,
                                float sWNS,
-                               size_t tot_moves) {
+                               size_t tot_moves,
+                               bool noc_enabled,
+                               float noc_agg_bw,
+                               float noc_agg_latency,
+                               float noc_cong) {
     VTR_LOG(
-        "%4zu "
-        "%6.1f "
-        "%7.1e "
+        "%4zu %6.1f %7.1e "
         "%7.3f %10.2f %-10.5g "
         "%7.3f % 10.3g % 8.3f "
         "%7.3f %7.4f %6.1f %8.2f",
         state.num_temps, elapsed_sec, state.t,
-        stats.av_cost, stats.av_bb_cost, stats.av_timing_cost, 1e9 * cpd,
-        1e9 * sTNS, 1e9 * sWNS, stats.success_rate, stats.std_dev,
-        state.rlim, state.crit_exponent);
+        stats.av_cost, stats.av_bb_cost, stats.av_timing_cost,
+        1e9 * cpd, 1e9 * sTNS, 1e9 * sWNS,
+        stats.success_rate, stats.std_dev, state.rlim, state.crit_exponent);
 
     pretty_print_uint(" ", tot_moves, 9, 3);
 
-    VTR_LOG(" %6.3f\n", state.alpha);
+    VTR_LOG(" %6.3f", state.alpha);
+
+    if (noc_enabled) {
+        VTR_LOG(
+            " %7.2e %7.2e %8.2f",
+            noc_agg_bw, noc_agg_latency, noc_cong);
+    }
+
+    VTR_LOG("\n");
     fflush(stdout);
 }
 

From 5726f98c45a48f22950431f0fcb619ab32dde903 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 29 Jan 2024 18:38:03 -0500
Subject: [PATCH 20/41] revert renormalization in initial noc placement

---
 vpr/src/place/initial_noc_placement.cpp | 61 -------------------------
 1 file changed, 61 deletions(-)

diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp
index aec088a1619..f5278123a4f 100644
--- a/vpr/src/place/initial_noc_placement.cpp
+++ b/vpr/src/place/initial_noc_placement.cpp
@@ -41,16 +41,6 @@ static void place_noc_routers_randomly(std::vector<ClusterBlockId>& unfixed_rout
  */
 static void noc_routers_anneal(const t_noc_opts& noc_opts);
 
-/**
- * @brief Check whether normalization factors need to be updated.
- *
- *   @param costs Most recent NoC cost terms.
- *   @param old_costs NoC cost terms from the last time normalization
- *   factors were updated.
- */
-static bool is_renormalization_needed(const t_placer_costs& costs,
-                                      const t_placer_costs& old_costs);
-
 static bool accept_noc_swap(double delta_cost, double prob) {
     if (delta_cost <= 0.0) {
         return true;
@@ -68,40 +58,6 @@ static bool accept_noc_swap(double delta_cost, double prob) {
     }
 }
 
-static bool is_renormalization_needed(const t_placer_costs& costs,
-                                      const t_placer_costs& old_costs) {
-    constexpr double COST_DIFF_TOLERANCE = 0.1;
-    bool renormalization_needed = false;
-    double cost_diff;
-
-    cost_diff = fabs(costs.noc_aggregate_bandwidth_cost - old_costs.noc_aggregate_bandwidth_cost);
-    // aggregate bandwidth has changed significantly
-    if (cost_diff > costs.noc_aggregate_bandwidth_cost * COST_DIFF_TOLERANCE) {
-        renormalization_needed = true;
-    }
-
-    cost_diff = (fabs(costs.noc_latency_cost - old_costs.noc_latency_cost));
-    // if latency cost only considers latency constraints, it might become zero
-    // a transition from zero or to zero cost necessitates renormalization
-    if ((costs.noc_latency_cost == 0.0 && old_costs.noc_latency_cost != 0.0) ||
-        (costs.noc_latency_cost != 0.0 && old_costs.noc_latency_cost == 0.0)) {
-        renormalization_needed = true;
-    } else if (cost_diff > costs.noc_latency_cost * COST_DIFF_TOLERANCE) {
-        renormalization_needed = true;
-    }
-
-    cost_diff = (fabs(costs.noc_congestion_cost - old_costs.noc_congestion_cost));
-    // a transition from zero or to zero cost necessitates renormalization
-    if ((costs.noc_congestion_cost == 0.0 && old_costs.noc_congestion_cost != 0.0) ||
-        (costs.noc_congestion_cost != 0.0 && old_costs.noc_congestion_cost == 0.0)) {
-        renormalization_needed = true;
-    } else if (cost_diff > costs.noc_congestion_cost * COST_DIFF_TOLERANCE) {
-        renormalization_needed = true;
-    }
-
-    return renormalization_needed;
-}
-
 static void place_constrained_noc_router(ClusterBlockId router_blk_id) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& floorplanning_ctx = g_vpr_ctx.floorplanning();
@@ -229,9 +185,6 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     const int N_MOVES_PER_ROUTER = 35000;
     const int N_MOVES = num_router_clusters * N_MOVES_PER_ROUTER;
 
-    const int RENORMALIZATION_LIM = 1024;
-    int renormalization_cnt = 0;
-
     const double starting_prob = 0.5;
     const double prob_step = starting_prob / N_MOVES;
 
@@ -251,9 +204,6 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
      * Range limit and the probability of accepting swaps with positive delta cost
      * decrease linearly as more swaps are evaluated. Late in the annealing,
      * NoC routers are swapped only with their neighbors as the range limit approaches 1.
-     *
-     * After each RENORMALIZATION_LIM accepted moves, if NoC cost terms have changed
-     * significantly, I update the normalization factors and re-compute the total cost.
      */
 
     // Generate and evaluate router moves
@@ -283,17 +233,6 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
                 if (costs.cost < checkpoint.get_cost() || !checkpoint.is_valid()) {
                     checkpoint.save_checkpoint(costs.cost);
                 }
-
-                renormalization_cnt++;
-                if (renormalization_cnt == RENORMALIZATION_LIM) {
-                    renormalization_cnt = 0;
-                    if (is_renormalization_needed(costs, old_costs)) {
-                        update_noc_normalization_factors(costs);
-                        costs.cost = calculate_noc_cost(NocCostTerms(costs), costs, noc_opts);
-                        old_costs = costs;
-                    }
-                }
-
             } else { // The proposed move is rejected
                 revert_move_blocks(blocks_affected);
                 revert_noc_traffic_flow_routes(blocks_affected);

From 3d41245079cfd5ca7d9df7d3ab86fdeb16ecc428 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 30 Jan 2024 11:46:03 -0500
Subject: [PATCH 21/41] Update test_check_noc_placement_costs to test
 congestion

---
 vpr/src/place/noc_place_utils.cpp |  9 ++--
 vpr/test/test_noc_place_utils.cpp | 69 ++++++++++++++++++++++---------
 2 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index fe863e86acd..377730bb110 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -481,11 +481,10 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
         }
     }
 
-    // if congestion cost is zero, we use a small cost for calculating the accepted error range
-    double non_zero_congestion_cost = (costs.noc_congestion_cost < MIN_EXPECTED_NOC_CONGESTION_COST) ? MIN_EXPECTED_NOC_CONGESTION_COST : costs.noc_congestion_cost;
-    // check whether the NoC congestion cost is within the error range
-    if (fabs(cost_check.congestion - costs.noc_congestion_cost) > non_zero_congestion_cost){
-        if (!(cost_check.congestion < MIN_EXPECTED_NOC_CONGESTION_COST && costs.noc_congestion_cost < MIN_EXPECTED_NOC_CONGESTION_COST)) {
+    // check the recomputed congestion cost only if it is higher than the minimum expected value
+    if (cost_check.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) {
+        // check whether the NoC congestion cost is within the error range
+        if (fabs(cost_check.congestion - costs.noc_congestion_cost) > costs.noc_congestion_cost * error_tolerance) {
             VTR_LOG_ERROR(
                 "noc_congestion_cost_check: %g and noc_congestion_cost: %g differ in check_noc_placement_costs.\n",
                 cost_check.congestion, costs.noc_congestion_cost);
diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index 87614fe64b9..1723dfdf200 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -128,7 +128,9 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
         double traffic_flow_bandwidth_usage = (double)dist_2(rand_num_gen);
 
         // create and add the traffic flow
-        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name, source_router_for_traffic_flow, sink_router_for_traffic_flow, traffic_flow_bandwidth_usage, traffic_flow_latency, traffic_flow_priority);
+        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name,
+                                                                  source_router_for_traffic_flow, sink_router_for_traffic_flow,
+                                                                  traffic_flow_bandwidth_usage, traffic_flow_latency, traffic_flow_priority);
 
         number_of_created_traffic_flows++;
 
@@ -309,7 +311,9 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
         int traffic_flow_priority = dist_1(rand_num_gen);
 
         // create and add the traffic flow
-        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name, source_router_for_traffic_flow, sink_router_for_traffic_flow, traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
+        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name,
+                                                                  source_router_for_traffic_flow, sink_router_for_traffic_flow,
+                                                                  traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
 
         number_of_created_traffic_flows++;
 
@@ -564,7 +568,9 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         int traffic_flow_priority = dist_1(rand_num_gen);
 
         // create and add the traffic flow
-        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name, source_router_for_traffic_flow, sink_router_for_traffic_flow, traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
+        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name,
+                                                                  source_router_for_traffic_flow, sink_router_for_traffic_flow,
+                                                                  traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
 
         number_of_created_traffic_flows++;
 
@@ -1523,23 +1529,21 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
     int router_grid_position_y;
 
     // setting the NoC parameters
-    noc_ctx.noc_model.set_noc_link_latency(1);
-    noc_ctx.noc_model.set_noc_router_latency(1);
-
-    double link_latency = 1;
-    double router_latency = 1;
+    const double link_latency = 1.0;
+    const double router_latency = 1.0;
+    const double link_bandwidth = 1.0;
+    noc_ctx.noc_model.set_noc_link_latency(link_latency);
+    noc_ctx.noc_model.set_noc_router_latency(router_latency);
+    noc_ctx.noc_model.set_noc_link_bandwidth(link_bandwidth);
 
     // noc options used in this test
     // we create these randomly
     t_noc_opts noc_opts;
     noc_opts.noc_latency_constraints_weighting = dist_3(double_engine);
     noc_opts.noc_latency_weighting = dist_3(double_engine);
+    noc_opts.noc_congestion_weighting = dist_3(double_engine);
     noc_opts.noc_routing_algorithm = "xy_routing";
 
-    // setting the NoC parameters
-    noc_ctx.noc_model.set_noc_link_latency(1);
-    noc_ctx.noc_model.set_noc_router_latency(1);
-
     // keeps track of which hard router each cluster block is placed
     vtr::vector<ClusterBlockId, NocRouterId> router_where_cluster_is_placed;
 
@@ -1582,6 +1586,11 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         }
     }
 
+    // initialize NoC link bandwidth usage
+    for (auto& noc_link : noc_ctx.noc_model.get_mutable_noc_links()) {
+        noc_link.set_bandwidth_usage(0.0);
+    }
+
     // now we need to create router cluster blocks and passing them to placed at a router hard block as an initial position
     for (int cluster_block_number = 0; cluster_block_number < NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST; cluster_block_number++) {
         // since the indexes for the hard router blocks start from 0, we will just place the router clusters on hard router blocks with the same id //
@@ -1626,7 +1635,9 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         int traffic_flow_priority = dist_1(rand_num_gen);
 
         // create and add the traffic flow
-        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name, source_router_for_traffic_flow, sink_router_for_traffic_flow, traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
+        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name,
+                                                                  source_router_for_traffic_flow, sink_router_for_traffic_flow,
+                                                                  traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
 
         number_of_created_traffic_flows++;
 
@@ -1643,7 +1654,7 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
     noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
-    NocRouting* routing_algorithm = new XYRouting();
+    auto routing_algorithm = std::make_unique<XYRouting>();
 
     // store the traffic flow routes found
     vtr::vector<NocTrafficFlowId, std::vector<NocLinkId>> golden_traffic_flow_routes;
@@ -1657,14 +1668,26 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         int source_hard_router_id = (size_t)curr_traffic_flow.source_router_cluster_id;
         int sink_hard_routed_id = (size_t)curr_traffic_flow.sink_router_cluster_id;
 
+        auto& traffic_flow_route = golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number];
+        double traffic_flow_bandwidth = curr_traffic_flow.traffic_flow_bandwidth;
+
         // route it
-        routing_algorithm->route_flow((NocRouterId)source_hard_router_id, (NocRouterId)sink_hard_routed_id, golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number], noc_ctx.noc_model);
+        routing_algorithm->route_flow((NocRouterId)source_hard_router_id, (NocRouterId)sink_hard_routed_id, traffic_flow_route, noc_ctx.noc_model);
+
+        // update link bandwidth utilization
+        for (auto link_id : traffic_flow_route) {
+            auto& noc_link = noc_ctx.noc_model.get_single_mutable_noc_link(link_id);
+            double curr_link_bw_util = noc_link.get_bandwidth_usage();
+            curr_link_bw_util += traffic_flow_bandwidth;
+            noc_link.set_bandwidth_usage(curr_link_bw_util);
+        }
     }
 
     // variables below store the expected noc costs (latency and bandwidth)
     t_placer_costs costs;
     costs.noc_aggregate_bandwidth_cost = 0.;
     costs.noc_latency_cost = 0.;
+    costs.noc_congestion_cost = 0.;
 
     for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) {
         const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number);
@@ -1672,12 +1695,16 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         double curr_bandwidth_cost = 0.;
         double curr_latency_cost = 0.;
 
+        // get the traffic flow route
+        const auto& golden_traffic_flow_route = golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number];
+
         // calculate the bandwidth cost
-        curr_bandwidth_cost = golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number].size() * curr_traffic_flow.traffic_flow_bandwidth;
+        curr_bandwidth_cost = golden_traffic_flow_route.size() * curr_traffic_flow.traffic_flow_bandwidth;
         curr_bandwidth_cost *= curr_traffic_flow.traffic_flow_priority;
 
-        double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number].size() + 1)) + (link_latency * golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number].size());
+        double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_route.size() + 1)) + (link_latency * golden_traffic_flow_route.size());
 
+        // calculate the latency cost
         curr_latency_cost = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
         curr_latency_cost *= curr_traffic_flow.traffic_flow_priority;
 
@@ -1685,6 +1712,12 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         costs.noc_latency_cost += curr_latency_cost;
     }
 
+    // calculate the congestion cost
+    for (const auto& noc_link : noc_ctx.noc_model.get_noc_links()) {
+        double curr_congestion_cost = noc_opts.noc_congestion_weighting * noc_link.get_congested_bandwidth_ratio();
+        costs.noc_congestion_cost += curr_congestion_cost;
+    }
+
     // this defines the error tolerance that is allowed between the golden noc costs and the costs found by the test function: check_noc_placement_costs
     // we will set it to what the VTR placer uses
     double error_tolerance = .01;
@@ -1712,7 +1745,5 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         // we expect error to be 3 here, meaning the found costs are not within the tolerance range
         REQUIRE(error == 3);
     }
-    // need to delete local noc routing algorithm
-    delete routing_algorithm;
 }
 } // namespace

From 5458ba8cb8415dd4c4fadb4616fee1ccc820a897 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 30 Jan 2024 14:34:01 -0500
Subject: [PATCH 22/41] Update test_initial_noc_placement to check congested
 links

---
 vpr/src/base/read_options.cpp     |  2 +-
 vpr/test/test_noc_place_utils.cpp | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 2a0454612b6..24aacf354ec 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2823,7 +2823,7 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
         .help(
             "Controls the importance of reducing the congestion of the NoC links."
             "This value can be >=0, where 0 would mean the congestion has no relevance to placement, a value of 1 would mean the congestion is weighted equally to the sum of other placement cost components and a value greater than 1 would mean the placement is increasingly dominated by reducing the link congestions.")
-        .default_value("0.05")
+        .default_value("0.00")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     noc_grp.add_argument<double>(args.noc_swap_percentage, "--noc_swap_percentage")
diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index 1723dfdf200..fbaeb161848 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -39,6 +39,12 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
     // the grid width will be the size of the noc mesh
     noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0);
 
+    // set NoC link bandwidth
+    // dist_2 is used to generate traffic flow bandwidths.
+    // Setting the NoC link bandwidth to max() / 5 makes link congestion more likely to happen
+    const double noc_link_bandwidth = dist_2.max() / 5;
+    noc_ctx.noc_model.set_noc_link_bandwidth(noc_link_bandwidth);
+
     // individual router parameters
     int curr_router_id;
     int router_grid_position_x;
@@ -146,7 +152,7 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
     noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
-    NocRouting* routing_algorithm = new XYRouting();
+    auto routing_algorithm = std::make_unique<XYRouting>();
 
     for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) {
         const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number);
@@ -187,12 +193,13 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
     for (int link_number = 0; link_number < number_of_links; link_number++) {
         NocLinkId current_link_id = (NocLinkId)link_number;
         const NocLink& current_link = noc_ctx.noc_model.get_single_noc_link(current_link_id);
+        double golden_congested_bandwidth = std::max(golden_link_bandwidths[current_link_id] - noc_link_bandwidth, 0.0);
+        double golden_congested_bw_ratio = golden_congested_bandwidth / noc_link_bandwidth;
 
         REQUIRE(golden_link_bandwidths[current_link_id] == current_link.get_bandwidth_usage());
+        REQUIRE(golden_congested_bandwidth == current_link.get_congested_bandwidth());
+        REQUIRE(golden_congested_bw_ratio == current_link.get_congested_bandwidth_ratio());
     }
-
-    // delete the local routing algorithm
-    delete routing_algorithm;
 }
 TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
     // setup random number generation
@@ -1668,6 +1675,7 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         int source_hard_router_id = (size_t)curr_traffic_flow.source_router_cluster_id;
         int sink_hard_routed_id = (size_t)curr_traffic_flow.sink_router_cluster_id;
 
+        // get the current traffic flow route
         auto& traffic_flow_route = golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number];
         double traffic_flow_bandwidth = curr_traffic_flow.traffic_flow_bandwidth;
 

From 5eed8ae18925d764745efcbd92a667fda6e45e9f Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 30 Jan 2024 14:50:55 -0500
Subject: [PATCH 23/41] Update test_initial_comp_cost_functions to check
 congestion cost computation

---
 vpr/test/test_noc_place_utils.cpp | 45 +++++++++++++++++++++++++------
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index fbaeb161848..9735cc2e9e2 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -201,6 +201,7 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
         REQUIRE(golden_congested_bw_ratio == current_link.get_congested_bandwidth_ratio());
     }
 }
+
 TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
     // setup random number generation
     std::random_device device;
@@ -226,6 +227,12 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
     // the grid width will be the size of the noc mesh
     noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0);
 
+    // set NoC link bandwidth
+    // dist_2 is used to generate traffic flow bandwidths.
+    // Setting the NoC link bandwidth to max() / 5 makes link congestion more likely to happen
+    const double noc_link_bandwidth = dist_2.max() / 5;
+    noc_ctx.noc_model.set_noc_link_bandwidth(noc_link_bandwidth);
+
     // individual router parameters
     int curr_router_id;
     int router_grid_position_x;
@@ -341,7 +348,7 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
     noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
-    NocRouting* routing_algorithm = new XYRouting();
+    auto routing_algorithm = std::make_unique<XYRouting>();
 
     // route all the traffic flows locally
     for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) {
@@ -393,9 +400,6 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
 
         // release the cost calculator datastructures
         free_noc_placement_structs();
-
-        // need to delete the local routing algorithm
-        delete routing_algorithm;
     }
 
     SECTION("test_comp_noc_latency_cost") {
@@ -427,18 +431,43 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
             golden_total_noc_latency_costs += current_latency_cost;
         }
 
-        // run the test function and get the bandwidth calculated
+        // run the test function and get the latency cost calculated
         double found_latency_cost = comp_noc_latency_cost(noc_opts);
 
-        // compare the test function bandwidth cost to the golden value
+        // compare the test function latency cost to the golden value
         // since we are comparing double numbers we allow a tolerance of difference
         REQUIRE(vtr::isclose(golden_total_noc_latency_costs, found_latency_cost));
 
         // release the cost calculator datastructures
         free_noc_placement_structs();
+    }
 
-        // need to delete the local routing algorithm
-        delete routing_algorithm;
+    SECTION("test_comp_noc_congestion_cost") {
+        //initialize all the cost calculator datastructures
+        allocate_and_load_noc_placement_structs();
+
+        // create the noc options
+        t_noc_opts noc_opts;
+        noc_opts.noc_congestion_weighting = dist_3(double_engine);
+
+        // create local variable to store the latency cost
+        double golden_total_noc_congestion_costs = 0.;
+
+        for (const auto& link : noc_ctx.noc_model.get_noc_links()) {
+            double congested_bw_ratio = link.get_congested_bandwidth_ratio();
+
+            golden_total_noc_congestion_costs += noc_opts.noc_congestion_weighting * congested_bw_ratio;
+        }
+
+        // run the test function to get the congestion cost
+        double found_congestion_cost = comp_noc_congestion_cost(noc_opts);
+
+        // compare the test function congestion cost to the golden value
+        // since we are comparing double numbers we allow a tolerance of difference
+        REQUIRE(vtr::isclose(golden_total_noc_congestion_costs, found_congestion_cost));
+
+        // release the cost calculator datastructures
+        free_noc_placement_structs();
     }
 }
 

From 304de901bcbe9b74cdd902093ea9122d2bd79723 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 30 Jan 2024 17:23:14 -0500
Subject: [PATCH 24/41] Update
 test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_costs,
 test_recompute_noc_costs to check congestion

---
 vpr/test/test_noc_place_utils.cpp | 54 ++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 8 deletions(-)

diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index 9735cc2e9e2..0ac33dacf97 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -507,14 +507,17 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     t_noc_opts noc_opts;
     noc_opts.noc_latency_constraints_weighting = dist_3(double_engine);
     noc_opts.noc_latency_weighting = dist_3(double_engine);
+    noc_opts.noc_congestion_weighting = dist_3(double_engine);
 
     // setting the NoC parameters
     noc_ctx.noc_model.set_noc_link_latency(1);
     noc_ctx.noc_model.set_noc_router_latency(1);
     noc_ctx.noc_model.set_noc_link_bandwidth(1);
+
     // needs to be the same as above
     double router_latency = noc_ctx.noc_model.get_noc_router_latency();
     double link_latency = noc_ctx.noc_model.get_noc_link_latency();
+    double link_bandwidth = noc_ctx.noc_model.get_noc_link_bandwidth();
 
     // keeps track of which hard router each cluster block is placed
     vtr::vector<ClusterBlockId, NocRouterId> router_where_cluster_is_placed;
@@ -622,7 +625,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
-    NocRouting* routing_algorithm = new XYRouting();
+    auto routing_algorithm = std::make_unique<XYRouting>();
 
     // store the traffic flow routes found
     vtr::vector<NocTrafficFlowId, std::vector<NocLinkId>> golden_traffic_flow_routes;
@@ -632,6 +635,9 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     vtr::vector<NocTrafficFlowId, double> golden_traffic_flow_latency_costs;
     golden_traffic_flow_bandwidth_costs.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows());
     golden_traffic_flow_latency_costs.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows());
+    // store link congestion costs
+    vtr::vector<NocLinkId, double> golden_link_congestion_costs;
+    golden_link_congestion_costs.resize(noc_ctx.noc_model.get_number_of_noc_links());
 
     // stores the change in bandwidth and latency costs from the test function
     NocCostTerms test_noc_costs{0.0, 0.0, 0.0};
@@ -680,12 +686,20 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         test_noc_costs.latency += golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number];
     }
 
+    // initialize golden congestion cost for all links
+    for (const auto& link : noc_ctx.noc_model.get_noc_links()) {
+        auto link_id = link.get_link_id();
+        golden_link_congestion_costs[link_id] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link_id] - link_bandwidth, 0.0);
+        test_noc_costs.congestion += golden_link_congestion_costs[link_id];
+    }
+
     // initialize noc placement structs
     allocate_and_load_noc_placement_structs();
 
-    // We need to run these functions as they initialize local variables needed to run the test function within this unit test. we assume thi is correct
+    // We need to run these functions as they initialize local variables needed to run the test function within this unit test. we assume this is correct
     comp_noc_aggregate_bandwidth_cost();
     comp_noc_latency_cost(noc_opts);
+    comp_noc_congestion_cost(noc_opts);
 
     // datastructure that keeps track of moved blocks during placement
     t_pl_blocks_to_be_moved blocks_affected(NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST);
@@ -753,6 +767,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
                 // go through the current traffic flow and reduce the bandwidths of the links
                 for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
                     golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
+                    golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
                 }
 
                 // re-route the traffic flow
@@ -761,6 +776,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
                 // go through the current traffic flow and increase the bandwidths of the links
                 for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
                     golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
+                    golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
                 }
 
                 // update the costs now
@@ -785,6 +801,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
                 // go through the current traffic flow and reduce the bandwidths of the links
                 for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
                     golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
+                    golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
                 }
 
                 // re-route the traffic flow
@@ -793,6 +810,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
                 // go through the current traffic flow and increase the bandwidths of the links
                 for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
                     golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
+                    golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
                 }
 
                 // update the costs now
@@ -813,9 +831,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // call the test function
         find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, noc_opts);
 
-        // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
+        // update the test total noc bandwidth, latency, and congestion costs based on the cost changes found by the test functions
         test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
         test_noc_costs.latency += delta_cost.latency;
+        test_noc_costs.congestion += delta_cost.congestion;
 
         // need this function to update the local datastructures that store all the traffic flow costs
         commit_noc_costs();
@@ -890,6 +909,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and reduce the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
+            golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // re-route the traffic flow
@@ -898,6 +918,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and increase the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
+            golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // update the costs now
@@ -918,6 +939,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and reduce the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
+            golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // re-route the traffic flow
@@ -926,6 +948,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and increase the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
+            golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // update the costs now
@@ -946,6 +969,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
     test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
     test_noc_costs.latency += delta_cost.latency;
+    test_noc_costs.congestion += delta_cost.congestion;
 
     // need this function to update the local datastructures that store all the traffic flow costs
     commit_noc_costs();
@@ -1008,6 +1032,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and reduce the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
+            golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // re-route the traffic flow
@@ -1016,6 +1041,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and increase the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
+            golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // update the costs now
@@ -1037,6 +1063,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
     test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
     test_noc_costs.latency += delta_cost.latency;
+    test_noc_costs.congestion += delta_cost.congestion;
 
     // need this function to update the local datastructures that store all the traffic flow costs
     commit_noc_costs();
@@ -1100,6 +1127,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
     test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
     test_noc_costs.latency += delta_cost.latency;
+    test_noc_costs.congestion += delta_cost.congestion;
 
     // need this function to update the local datastructures that store all the traffic flow costs
     commit_noc_costs();
@@ -1112,22 +1140,32 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     for (int link_number = 0; link_number < number_of_links; link_number++) {
         NocLinkId current_link_id = (NocLinkId)link_number;
         const NocLink& current_link = noc_ctx.noc_model.get_single_noc_link(current_link_id);
+        double golden_link_bandwidth = golden_link_bandwidths[current_link_id];
+        double golden_link_congested_bandwidth = std::max(golden_link_bandwidths[current_link_id] - link_bandwidth, 0.0);
+        double golden_link_congested_bandwidth_ratio = golden_link_congested_bandwidth / link_bandwidth;
 
-        REQUIRE(golden_link_bandwidths[current_link_id] == current_link.get_bandwidth_usage());
+        REQUIRE(golden_link_bandwidth == current_link.get_bandwidth_usage());
+        REQUIRE(golden_link_congested_bandwidth == current_link.get_congested_bandwidth());
+        REQUIRE(golden_link_congested_bandwidth_ratio == current_link.get_congested_bandwidth_ratio());
     }
 
-    // now find the total expected noc aggregate bandwidth and latency cost
+    // now find the total expected noc aggregate bandwidth, latency, and congestion cost
     double golden_total_noc_aggr_bandwidth_cost = 0.;
     double golden_total_noc_latency_cost = 0.;
+    double golden_total_noc_congestion_cost = 0.;
 
     for (int traffic_flow_number = 0; traffic_flow_number < number_of_created_traffic_flows; traffic_flow_number++) {
         golden_total_noc_aggr_bandwidth_cost += golden_traffic_flow_bandwidth_costs[(NocTrafficFlowId)traffic_flow_number];
         golden_total_noc_latency_cost += golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number];
     }
 
+    golden_total_noc_congestion_cost = std::accumulate(golden_link_congestion_costs.begin(), golden_link_congestion_costs.end(), 0.0);
+
     // now check whether the expected noc costs that we manually calculated above match the noc costs found through the test function (we allow for a tolerance of difference)
     REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth));
     REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency));
+    std::cout << golden_total_noc_congestion_cost << " " <<  test_noc_costs.congestion << std::endl;
+    REQUIRE(vtr::isclose(golden_total_noc_congestion_cost, test_noc_costs.congestion));
 
     // now test the recompute cost function //
     // The recompute cost function just adds up all traffic flow costs, so it match the expected noc costs that we manually calculated above by summing up all the expected individual traffic flow costs. //
@@ -1135,6 +1173,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // start by resetting the test cost variables
     test_noc_costs.aggregate_bandwidth = 0.;
     test_noc_costs.latency = 0.;
+    test_noc_costs.congestion = 0.;
 
     // now execute the test function
     recompute_noc_costs(test_noc_costs);
@@ -1142,13 +1181,12 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // now verify
     REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth));
     REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency));
+    REQUIRE(vtr::isclose(golden_total_noc_congestion_cost, test_noc_costs.congestion));
 
     // delete local datastructures
     free_noc_placement_structs();
-
-    // need to delete local noc routing algorithm
-    delete routing_algorithm;
 }
+
 TEST_CASE("test_update_noc_normalization_factors", "[noc_place_utils]") {
     // creating local parameters needed for the test
     t_placer_costs costs;

From 0827d9983eab6841e9d9307b72b4aada097bf925 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 30 Jan 2024 18:12:31 -0500
Subject: [PATCH 25/41] Updated
 test_find_affected_noc_routers_and_update_noc_costs to check routes after
 revert

---
 vpr/test/test_noc_place_utils.cpp | 58 +++++++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 3 deletions(-)

diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index 0ac33dacf97..4df3d8e1e7c 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -1195,6 +1195,7 @@ TEST_CASE("test_update_noc_normalization_factors", "[noc_place_utils]") {
     SECTION("Test case where the bandwidth cost is 0") {
         costs.noc_aggregate_bandwidth_cost = 0.;
         costs.noc_latency_cost = 1.;
+        costs.noc_congestion_cost = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
@@ -1206,6 +1207,7 @@ TEST_CASE("test_update_noc_normalization_factors", "[noc_place_utils]") {
     SECTION("Test case where the latency cost is 0") {
         costs.noc_aggregate_bandwidth_cost = 1.;
         costs.noc_latency_cost = 0.;
+        costs.noc_congestion_cost = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
@@ -1217,6 +1219,7 @@ TEST_CASE("test_update_noc_normalization_factors", "[noc_place_utils]") {
     SECTION("Test case where the bandwidth cost is an expected value") {
         costs.noc_aggregate_bandwidth_cost = 1.e9;
         costs.noc_latency_cost = 0.;
+        costs.noc_congestion_cost = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
@@ -1228,6 +1231,7 @@ TEST_CASE("test_update_noc_normalization_factors", "[noc_place_utils]") {
     SECTION("Test case where the latency cost is an expected value") {
         costs.noc_aggregate_bandwidth_cost = 1.;
         costs.noc_latency_cost = 50.e-12;
+        costs.noc_congestion_cost = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
@@ -1239,6 +1243,7 @@ TEST_CASE("test_update_noc_normalization_factors", "[noc_place_utils]") {
     SECTION("Test case where the latency cost is lower than the smallest expected value") {
         costs.noc_aggregate_bandwidth_cost = 1.;
         costs.noc_latency_cost = 999.e-15;
+        costs.noc_congestion_cost = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
@@ -1247,6 +1252,41 @@ TEST_CASE("test_update_noc_normalization_factors", "[noc_place_utils]") {
         // this should not be trimmed
         REQUIRE(costs.noc_latency_cost_norm == 1.e12);
     }
+    SECTION("Test case where the congestion cost is zero") {
+        costs.noc_aggregate_bandwidth_cost = 1.;
+        costs.noc_latency_cost = 1.;
+        costs.noc_congestion_cost = 0.;
+
+        // run the test function
+        update_noc_normalization_factors(costs);
+
+        // verify the congestion normalization factor
+        // this should not be infinite
+        REQUIRE(costs.noc_congestion_cost_norm == 1.e3);
+    }
+    SECTION("Test case where the congestion cost is lower than the smallest expected value") {
+        costs.noc_aggregate_bandwidth_cost = 1.;
+        costs.noc_latency_cost = 1.;
+        costs.noc_congestion_cost = 999.e-15;
+
+        // run the test function
+        update_noc_normalization_factors(costs);
+
+        // verify the congestion normalization factor
+        // this should not be infinite
+        REQUIRE(costs.noc_congestion_cost_norm == 1.e3);
+    }
+    SECTION("Test case where the congestion cost is an expected value") {
+        costs.noc_aggregate_bandwidth_cost = 1.;
+        costs.noc_latency_cost = 1.;
+        costs.noc_congestion_cost = 1.e2;
+
+        // run the test function
+        update_noc_normalization_factors(costs);
+
+        // verify the congestion normalization factor
+        REQUIRE(costs.noc_congestion_cost_norm == 1.e-2);
+    }
 }
 TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
     // setup random number generation
@@ -1284,10 +1324,12 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
     t_noc_opts noc_opts;
     noc_opts.noc_latency_constraints_weighting = dist_3(double_engine);
     noc_opts.noc_latency_weighting = dist_3(double_engine);
+    noc_opts.noc_congestion_weighting = dist_3(double_engine);
 
     // setting the NoC parameters
     noc_ctx.noc_model.set_noc_link_latency(1);
     noc_ctx.noc_model.set_noc_router_latency(1);
+    noc_ctx.noc_model.set_noc_link_bandwidth(1);
 
     // keeps track of which hard router each cluster block is placed
     vtr::vector<ClusterBlockId, NocRouterId> router_where_cluster_is_placed;
@@ -1375,7 +1417,9 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
         int traffic_flow_priority = dist_1(rand_num_gen);
 
         // create and add the traffic flow
-        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name, source_router_for_traffic_flow, sink_router_for_traffic_flow, traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
+        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name,
+                                                                  source_router_for_traffic_flow, sink_router_for_traffic_flow,
+                                                                  traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
 
         number_of_created_traffic_flows++;
 
@@ -1392,7 +1436,7 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
     noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
-    NocRouting* routing_algorithm = new XYRouting();
+    auto routing_algorithm = std::make_unique<XYRouting>();
 
     // store the traffic flow routes found
     vtr::vector<NocTrafficFlowId, std::vector<NocLinkId>> golden_traffic_flow_routes;
@@ -1410,6 +1454,8 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
         routing_algorithm->route_flow((NocRouterId)source_hard_router_id, (NocRouterId)sink_hard_routed_id, golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number], noc_ctx.noc_model);
     }
 
+    const vtr::vector<NocTrafficFlowId, std::vector<NocLinkId>> initial_golden_traffic_flow_routes = golden_traffic_flow_routes;
+
     // assume this works
     // this is needed to set up the global noc packet router and also global datastructures
     initial_noc_routing();
@@ -1567,9 +1613,15 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
         const NocLink& current_link = noc_ctx.noc_model.get_single_noc_link(current_link_id);
 
         REQUIRE(golden_link_bandwidths[current_link_id] == current_link.get_bandwidth_usage());
+
     }
 
-    delete routing_algorithm;
+    for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) {
+        auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number;
+        const auto& traffic_flow_route = noc_ctx.noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
+        const auto& golden_traffic_flow_route = initial_golden_traffic_flow_routes[traffic_flow_id];
+        REQUIRE(traffic_flow_route == golden_traffic_flow_route);
+    }
 }
 TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
     // setup random number generation

From 48969841e40e890b313ef115ccf6fd5e789e5288 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 31 Jan 2024 12:38:53 -0500
Subject: [PATCH 26/41] Comment some functions and data structures

---
 vpr/src/place/noc_place_utils.cpp | 43 +++++++++++++---
 vpr/src/place/noc_place_utils.h   | 81 ++++++++++++++++++++++++++++---
 vpr/src/place/place_util.h        |  9 ++++
 3 files changed, 118 insertions(+), 15 deletions(-)

diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 377730bb110..f30ef8bf0b9 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -8,8 +8,10 @@ static vtr::vector<NocTrafficFlowId, TrafficFlowPlaceCost> traffic_flow_costs, p
 /* Keeps track of traffic flows that have been updated at each attempted placement move*/
 static std::vector<NocTrafficFlowId> affected_traffic_flows;
 
+/* Proposed and actual congestion cost of a NoC link used for each move assessment */
 static vtr::vector<NocLinkId , double> link_congestion_costs, proposed_link_congestion_costs;
 
+/* Keeps track of NoC links whose bandwidth usage have been updated at each attempted placement move*/
 static std::unordered_set<NocLinkId> affected_noc_links;
 /*********************************************************** *****************************/
 
@@ -23,9 +25,21 @@ static std::unordered_set<NocLinkId> affected_noc_links;
  * False if there are no NoC routers in the netlist or the
  * selected NoC router is fixed/
  */
-static bool select_random_router_cluster(ClusterBlockId& b_from, t_pl_loc& from, t_logical_block_type_ptr& cluster_from_type);
+static bool select_random_router_cluster(ClusterBlockId& b_from,
+                                         t_pl_loc& from,
+                                         t_logical_block_type_ptr& cluster_from_type);
 
-static std::vector<NocLinkId> find_affected_links_by_flow_reroute(std::vector<NocLinkId>& prev_links, std::vector<NocLinkId>& curr_links);
+/**
+ * @brief Given two traffic flow routes, finds links that appear
+ * only in one route.
+ *
+ * @param prev_links Previous route before re-routing the traffic flow
+ * @param curr_links Current route after re-routing the traffic flow
+ *
+ * @return Unique links that appear only in one of the given routes
+ */
+static std::vector<NocLinkId> find_affected_links_by_flow_reroute(std::vector<NocLinkId>& prev_links,
+                                                                  std::vector<NocLinkId>& curr_links);
 
 void initial_noc_routing(void) {
     // need to update the link usages within after routing all the traffic flows
@@ -43,7 +57,7 @@ void initial_noc_routing(void) {
         const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
         // update the traffic flow route based on where the router cluster blocks are placed
-        std::vector<NocLinkId>& curr_traffic_flow_route = route_traffic_flow(traffic_flow_id, noc_ctx.noc_model, noc_traffic_flows_storage, *noc_ctx.noc_flows_router);
+        std::vector<NocLinkId>& curr_traffic_flow_route = route_traffic_flow(traffic_flow_id, noc_ctx.noc_model,noc_traffic_flows_storage, *noc_ctx.noc_flows_router);
 
         // update the links used in the found traffic flow route, links' bandwidth should be incremented since the traffic flow is routed
         update_traffic_flow_link_usage(curr_traffic_flow_route, noc_ctx.noc_model, 1, curr_traffic_flow.traffic_flow_bandwidth);
@@ -70,7 +84,9 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
     costs.noc_congestion_cost = comp_noc_congestion_cost(noc_opts);
 }
 
-void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, NocCostTerms& delta_c, const t_noc_opts& noc_opts) {
+void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected,
+                                                    NocCostTerms& delta_c,
+                                                    const t_noc_opts& noc_opts) {
     VTR_ASSERT_SAFE(delta_c.aggregate_bandwidth == 0.);
     VTR_ASSERT_SAFE(delta_c.latency == 0.);
     VTR_ASSERT_SAFE(delta_c.congestion == 0.);
@@ -155,7 +171,10 @@ void commit_noc_costs() {
     return;
 }
 
-std::vector<NocLinkId>& route_traffic_flow(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, NocRouting& noc_flows_router) {
+std::vector<NocLinkId>& route_traffic_flow(NocTrafficFlowId traffic_flow_id,
+                                           const NocStorage& noc_model,
+                                           NocTrafficFlows& noc_traffic_flows_storage,
+                                           NocRouting& noc_flows_router) {
     // provides the positions where the affected blocks have moved to
     auto& place_ctx = g_vpr_ctx.placement();
 
@@ -193,7 +212,11 @@ void update_traffic_flow_link_usage(const std::vector<NocLinkId>& traffic_flow_r
     return;
 }
 
-void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, std::unordered_set<NocTrafficFlowId>& updated_traffic_flows) {
+void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id,
+                                       NocTrafficFlows& noc_traffic_flows_storage,
+                                       NocStorage& noc_model,
+                                       NocRouting& noc_flows_router,
+                                       std::unordered_set<NocTrafficFlowId>& updated_traffic_flows) {
     // get all the associated traffic flows for the logical router cluster block
     const std::vector<NocTrafficFlowId>* assoc_traffic_flows = noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(moved_block_router_id);
 
@@ -272,7 +295,10 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect
     return;
 }
 
-void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router) {
+void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id,
+                           NocTrafficFlows& noc_traffic_flows_storage,
+                           NocStorage& noc_model,
+                           NocRouting& noc_flows_router) {
     // get the current traffic flow info
     const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
@@ -798,7 +824,8 @@ void write_noc_placement_file(const std::string& file_name) {
     return;
 }
 
-static std::vector<NocLinkId> find_affected_links_by_flow_reroute(std::vector<NocLinkId>& prev_links, std::vector<NocLinkId>& curr_links) {
+static std::vector<NocLinkId> find_affected_links_by_flow_reroute(std::vector<NocLinkId>& prev_links,
+                                                                  std::vector<NocLinkId>& curr_links) {
     // Sort both link containers
     std::sort(prev_links.begin(), prev_links.end());
     std::sort(curr_links.begin(), curr_links.end());
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index dd97f7d1bc6..4a13b2d67c9 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -107,7 +107,9 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
  * NoC latency cost caused by a placer move is stored
  * here.
  */
-void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, NocCostTerms& delta_c, const t_noc_opts& noc_opts);
+void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected,
+                                                    NocCostTerms& delta_c,
+                                                    const t_noc_opts& noc_opts);
 
 /**
  * @brief Updates static datastructures found in 'noc_place_utils.cpp'
@@ -154,7 +156,10 @@ void commit_noc_costs();
  * flows within the NoC.
  * @return std::vector<NocLinkId>& The found route for the traffic flow.
  */
-std::vector<NocLinkId>& route_traffic_flow(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, NocRouting& noc_flows_router);
+std::vector<NocLinkId>& route_traffic_flow(NocTrafficFlowId traffic_flow_id,
+                                           const NocStorage& noc_model,
+                                           NocTrafficFlows& noc_traffic_flows_storage,
+                                           NocRouting& noc_flows_router);
 
 /**
  * @brief Updates the bandwidth usages of links found in a routed traffic flow.
@@ -202,7 +207,10 @@ void update_traffic_flow_link_usage(const std::vector<NocLinkId>& traffic_flow_r
  * @param updated_traffic_flows Keeps track of traffic flows that have been
  * re-routed. Used to prevent re-routing the same traffic flow multiple times.
  */
-void re_route_associated_traffic_flows(ClusterBlockId moved_router_block_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, std::unordered_set<NocTrafficFlowId>& updated_traffic_flows);
+void re_route_associated_traffic_flows(ClusterBlockId moved_router_block_id,
+                                       NocTrafficFlows& noc_traffic_flows_storage,
+                                       NocStorage& noc_model, NocRouting& noc_flows_router,
+                                       std::unordered_set<NocTrafficFlowId>& updated_traffic_flows);
 
 /**
  * @brief Used to re-route all the traffic flows associated to logical
@@ -233,7 +241,10 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect
  * @param noc_flows_router The packet routing algorithm used to route traffic
  * flows within the NoC.
  */
-void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router);
+void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id,
+                           NocTrafficFlows& noc_traffic_flows_storage,
+                           NocStorage& noc_model,
+                           NocRouting& noc_flows_router);
 
 /**
  * @brief Recompute the NoC costs (aggregate bandwidth and latency) by
@@ -369,13 +380,44 @@ double calculate_traffic_flow_aggregate_bandwidth_cost(const std::vector<NocLink
  * @param traffic_flow_info Contains the traffic flow priority.
  * @param noc_opts Contains the user provided weightings of the traffic flow 
  * latency and its constraint parameters for the cost calculation.
- * @return THe computed latency for the provided traffic flow
+ * @return The computed latency for the provided traffic flow
  */
-double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route, const NocStorage& noc_model, const t_noc_traffic_flow& traffic_flow_info, const t_noc_opts& noc_opts);
+double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route,
+                                           const NocStorage& noc_model,
+                                           const t_noc_traffic_flow& traffic_flow_info,
+                                           const t_noc_opts& noc_opts);
 
+/**
+ * @brief Determines the congestion cost a NoC link. The cost
+ * is calculating by measuring how much the current bandwidth
+ * going through the link exceeds the link's bandwidth capacity.
+ *
+ * @param link The NoC link for which the congestion cost is
+ * to be computed
+ * @param noc_opts Contains the user provided weighting factor to
+ * specify the importance of congestion costs compared to other
+ * NoC-related cost terms.
+ * @return The computed congestion cost for the given NoC link.
+ */
 double calculate_link_congestion_cost(const NocLink& link, const t_noc_opts& noc_opts);
 
-double calculate_noc_cost(const NocCostTerms& cost_terms, const t_placer_costs& norm_factors, const t_noc_opts& noc_opts);
+/**
+ * @brief Computes a weighted average of NoC cost term to determine
+ * NoC's contribution to the total placement cost.
+ *
+ * @param cost_terms Different NoC-related cost terms.
+ * @param norm_factors Normalization factors used to scale
+ * different NoC-related cost term so that they have similar
+ * ranges.
+ * @param noc_opts Contains noc_placement_weighting factor
+ * to specify the contribution of NoC-related cost to the
+ * total placement cost.
+ * @return  The computed total NoC-related contribution to the
+ * total placement cost.
+ */
+double calculate_noc_cost(const NocCostTerms& cost_terms,
+                          const t_placer_costs& norm_factors,
+                          const t_noc_opts& noc_opts);
 
 /**
  * @brief Goes through all the traffic flows and determines whether the
@@ -385,12 +427,37 @@ double calculate_noc_cost(const NocCostTerms& cost_terms, const t_placer_costs&
  */
 int get_number_of_traffic_flows_with_latency_cons_met(void);
 
+/**
+ * @brief Goes through all NoC links and counts the congested ones.
+ *
+ * @return The total number of congested NoC links.
+ */
 int get_number_of_congested_noc_links(void);
 
+/**
+ * @brief Goes through all NoC links and determines whether they
+ * are congested or not. Then adds up the congestion ratio of all
+ * congested links.
+ *
+ * @return The total congestion ratio
+ */
 double get_total_congestion_bandwidth_ratio(void);
 
+/**
+ * @brief Goes through all NoC links and determines whether they
+ * are congested or not. Then finds n links that are most congested.
+ *
+ * @return n links with highest congestion ratio
+ */
 std::vector<NocLink> get_top_n_congested_links(int n);
 
+
+/**
+ * @brief Goes through all NoC links and determines whether they
+ * are congested or not. Then finds n links that are most congested.
+ *
+ * @return n highest congestion ratios
+ */
 std::vector<double> get_top_n_congestion_ratios(int n);
 
 /**
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index b70c84c3643..2e1355f121d 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -16,6 +16,15 @@
 // in NocCostTerms constructor
 class t_placer_costs;
 
+/**
+ * @brief Data structure that stores different cost terms for NoC placement.
+ *
+ *   @param aggregate_bandwidth The total used bandwidth used in the NoC.
+ *   @param latency A weighted average between aggregate latency and
+ *   latency overruns.
+ *   @param congestion The sum of congestion divided by available bandwidth
+ *   over all NoC links.
+ */
 struct NocCostTerms {
   public:
     NocCostTerms() = delete;

From 6bede42ab59743ad5e47bbeca838a87f69844c52 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 1 Feb 2024 15:25:09 -0500
Subject: [PATCH 27/41] Separate NoC cost computation and normalization

---
 vpr/src/place/initial_noc_placement.cpp |  14 +--
 vpr/src/place/noc_place_utils.cpp       | 132 +++++++++++--------
 vpr/src/place/noc_place_utils.h         |  26 ++--
 vpr/src/place/place.cpp                 | 161 ++++++++++++------------
 vpr/src/place/place_util.cpp            |  28 +++--
 vpr/src/place/place_util.h              |  20 +--
 6 files changed, 207 insertions(+), 174 deletions(-)

diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp
index 10899b24ba5..27424e67d6a 100644
--- a/vpr/src/place/initial_noc_placement.cpp
+++ b/vpr/src/place/initial_noc_placement.cpp
@@ -158,11 +158,11 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     t_placer_costs costs;
 
     // Initialize NoC-related costs
-    costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost();
-    costs.noc_latency_cost = comp_noc_latency_cost(noc_opts);
-    costs.noc_congestion_cost = comp_noc_congestion_cost(noc_opts);
+    costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost();
+    std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = comp_noc_latency_cost();
+    costs.noc_cost_terms.congestion = comp_noc_congestion_cost();
     update_noc_normalization_factors(costs);
-    costs.cost = calculate_noc_cost(NocCostTerms(costs), costs, noc_opts);
+    costs.cost = calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts);
 
     // Maximum distance in each direction that a router can travel in a move
     // It is assumed that NoC routers are organized in a square grid.
@@ -214,9 +214,9 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
         if (create_move_outcome != e_create_move::ABORT) {
             apply_move_blocks(blocks_affected);
 
-            NocCostTerms noc_delta_c {0.0, 0.0, 0.0};
-            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c, noc_opts);
-            double delta_cost = calculate_noc_cost(noc_delta_c, costs, noc_opts);
+            NocCostTerms noc_delta_c;
+            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c);
+            double delta_cost = calculate_noc_cost(noc_delta_c, costs.noc_cost_norm_factors, noc_opts);
 
             double prob = starting_prob - i_move * prob_step;
             bool move_accepted = accept_noc_swap(delta_cost, prob);
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index f30ef8bf0b9..a806b0b9ea1 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -79,16 +79,16 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
     initial_noc_routing();
 
     // Initialize traffic_flow_costs
-    costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost();
-    costs.noc_latency_cost = comp_noc_latency_cost(noc_opts);
-    costs.noc_congestion_cost = comp_noc_congestion_cost(noc_opts);
+    costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost();
+    std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = comp_noc_latency_cost();
+    costs.noc_cost_terms.congestion = comp_noc_congestion_cost();
 }
 
 void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected,
-                                                    NocCostTerms& delta_c,
-                                                    const t_noc_opts& noc_opts) {
+                                                    NocCostTerms& delta_c) {
     VTR_ASSERT_SAFE(delta_c.aggregate_bandwidth == 0.);
     VTR_ASSERT_SAFE(delta_c.latency == 0.);
+    VTR_ASSERT(delta_c.latency_overrun == 0.);
     VTR_ASSERT_SAFE(delta_c.congestion == 0.);
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
@@ -122,11 +122,13 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move
 
         // calculate the new aggregate bandwidth and latency costs for the affected traffic flow
         proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = calculate_traffic_flow_aggregate_bandwidth_cost(traffic_flow_route, curr_traffic_flow);
-        proposed_traffic_flow_costs[traffic_flow_id].latency = calculate_traffic_flow_latency_cost(traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow, noc_opts);
+        std::tie(proposed_traffic_flow_costs[traffic_flow_id].latency,
+                 proposed_traffic_flow_costs[traffic_flow_id].latency_overrun) = calculate_traffic_flow_latency_cost(traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow);
 
         // compute how much the aggregate bandwidth and latency costs change with this swap
         delta_c.aggregate_bandwidth += proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth - traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
         delta_c.latency += proposed_traffic_flow_costs[traffic_flow_id].latency - traffic_flow_costs[traffic_flow_id].latency;
+        delta_c.latency_overrun += proposed_traffic_flow_costs[traffic_flow_id].latency_overrun - traffic_flow_costs[traffic_flow_id].latency_overrun;
     }
 
     // Iterate over all affected links and calculate their new congestion cost and store it
@@ -135,7 +137,7 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move
         const auto& link = noc_ctx.noc_model.get_single_noc_link(link_id);
 
         // calculate the new congestion cost for the link and store it
-        proposed_link_congestion_costs[link] = calculate_link_congestion_cost(link, noc_opts);
+        proposed_link_congestion_costs[link] = calculate_link_congestion_cost(link);
 
         // compute how much the congestion cost changes with this swap
         delta_c.congestion += proposed_link_congestion_costs[link] - link_congestion_costs[link];
@@ -154,6 +156,7 @@ void commit_noc_costs() {
         // reset the proposed traffic flows costs
         proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = INVALID_NOC_COST_TERM;
         proposed_traffic_flow_costs[traffic_flow_id].latency = INVALID_NOC_COST_TERM;
+        proposed_traffic_flow_costs[traffic_flow_id].latency_overrun = INVALID_NOC_COST_TERM;
     }
 
     // Iterate over all the NoC links whose bandwidth utilization was affected by the proposed move
@@ -321,12 +324,13 @@ void recompute_noc_costs(NocCostTerms& new_cost) {
     auto& noc_ctx = g_vpr_ctx.noc();
 
     // reset the cost variables first
-    new_cost = NocCostTerms{0.0, 0.0, 0.0};
+    new_cost = NocCostTerms{0.0, 0.0, 0.0, 0.0};
 
     // go through the costs of all the traffic flows and add them up to recompute the total costs associated with the NoC
     for (const auto& traffic_flow_id : noc_ctx.noc_traffic_flows_storage.get_all_traffic_flow_id()) {
         new_cost.aggregate_bandwidth += traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
         new_cost.latency += traffic_flow_costs[traffic_flow_id].latency;
+        new_cost.latency_overrun += traffic_flow_costs[traffic_flow_id].latency_overrun;
     }
 
     // Iterate over all NoC links and accumulate their congestion costs
@@ -339,14 +343,23 @@ void recompute_noc_costs(NocCostTerms& new_cost) {
 
 void update_noc_normalization_factors(t_placer_costs& costs) {
     //Prevent the norm factors from going to infinity
-    costs.noc_aggregate_bandwidth_cost_norm = std::min(1 / costs.noc_aggregate_bandwidth_cost, MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST);
-    costs.noc_latency_cost_norm = std::min(1 / costs.noc_latency_cost, MAX_INV_NOC_LATENCY_COST);
+    costs.noc_cost_norm_factors.aggregate_bandwidth = std::min(1 / costs.noc_cost_terms.aggregate_bandwidth, MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST);
+    costs.noc_cost_norm_factors.latency = std::min(1 / costs.noc_cost_terms.latency, MAX_INV_NOC_LATENCY_COST);
 
-    // to avoid division by zero
-    if (costs.noc_congestion_cost > 0.0) {
-        costs.noc_congestion_cost_norm = std::min(1 / costs.noc_congestion_cost, MAX_INV_NOC_CONGESTION_COST);
+    // to avoid division by zero and negative numbers
+    // latency overrun cost may take very small negative values due to round-off error
+    if (costs.noc_cost_terms.latency_overrun > 0.0) {
+        costs.noc_cost_norm_factors.latency_overrun = std::min(1 / costs.noc_cost_terms.latency_overrun, MAX_INV_NOC_LATENCY_COST);
     } else {
-        costs.noc_congestion_cost_norm = MAX_INV_NOC_CONGESTION_COST;
+        costs.noc_cost_norm_factors.latency_overrun = MAX_INV_NOC_LATENCY_COST;
+    }
+
+    // to avoid division by zero and negative numbers
+    // congestion cost may take very small negative values due to round-off error
+    if (costs.noc_cost_terms.congestion > 0.0) {
+        costs.noc_cost_norm_factors.congestion = std::min(1 / costs.noc_cost_terms.congestion, MAX_INV_NOC_CONGESTION_COST);
+    } else {
+        costs.noc_cost_norm_factors.congestion = MAX_INV_NOC_CONGESTION_COST;
     }
 
     return;
@@ -378,13 +391,13 @@ double comp_noc_aggregate_bandwidth_cost(void) {
     return noc_aggregate_bandwidth_cost;
 }
 
-double comp_noc_latency_cost(const t_noc_opts& noc_opts) {
+std::pair<double, double> comp_noc_latency_cost() {
     // used to get traffic flow route information
     auto& noc_ctx = g_vpr_ctx.noc();
     // datastructure that stores all the traffic flow routes
     const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
-    double noc_latency_cost = 0.;
+    std::pair<double, double> noc_latency_cost_terms{0.0, 0.0};
 
     // now go through each traffic flow route and calculate its
     // latency. Then store this in local data structures and accumulate it.
@@ -392,19 +405,21 @@ double comp_noc_latency_cost(const t_noc_opts& noc_opts) {
         const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
         const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
 
-        double curr_traffic_flow_latency_cost = calculate_traffic_flow_latency_cost(curr_traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow, noc_opts);
+        auto [curr_traffic_flow_latency, curr_traffic_flow_latency_overrun] = calculate_traffic_flow_latency_cost(curr_traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow);
 
-        // store the calculated latency for the current traffic flow in local datastructures (this also initializes them)
-        traffic_flow_costs[traffic_flow_id].latency = curr_traffic_flow_latency_cost;
+        // store the calculated latency cost terms for the current traffic flow in local datastructures (this also initializes them)
+        traffic_flow_costs[traffic_flow_id].latency = curr_traffic_flow_latency;
+        traffic_flow_costs[traffic_flow_id].latency_overrun = curr_traffic_flow_latency_overrun;
 
-        // accumulate the latency cost
-        noc_latency_cost += curr_traffic_flow_latency_cost;
+        // accumulate the latency cost terms
+        noc_latency_cost_terms.first += curr_traffic_flow_latency;
+        noc_latency_cost_terms.second += curr_traffic_flow_latency_overrun;
     }
 
-    return noc_latency_cost;
+    return noc_latency_cost_terms;
 }
 
-double comp_noc_congestion_cost(const t_noc_opts& noc_opts) {
+double comp_noc_congestion_cost() {
     // Used to access NoC links
     auto& noc_ctx = g_vpr_ctx.noc();
 
@@ -412,7 +427,7 @@ double comp_noc_congestion_cost(const t_noc_opts& noc_opts) {
 
     // Iterate over all NoC links
     for (const auto& link : noc_ctx.noc_model.get_noc_links()) {
-        double link_congestion_cost = calculate_link_congestion_cost(link, noc_opts);
+        double link_congestion_cost = calculate_link_congestion_cost(link);
 
         // store the congestion cost for this link in static data structures (this also initializes them)
         link_congestion_costs[link] = link_congestion_cost;
@@ -426,7 +441,7 @@ double comp_noc_congestion_cost(const t_noc_opts& noc_opts) {
 
 int check_noc_placement_costs(const t_placer_costs& costs, double error_tolerance, const t_noc_opts& noc_opts) {
     int error = 0;
-    NocCostTerms cost_check{0.0, 0.0, 0.0};
+    NocCostTerms cost_check{0.0, 0.0, 0.0, 0.0};
 
     // get current router block locations
     auto& place_ctx = g_vpr_ctx.placement();
@@ -468,8 +483,9 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
         double current_flow_aggregate_bandwidth_cost = calculate_traffic_flow_aggregate_bandwidth_cost(temp_found_noc_route, curr_traffic_flow);
         cost_check.aggregate_bandwidth += current_flow_aggregate_bandwidth_cost;
 
-        double current_flow_latency_cost = calculate_traffic_flow_latency_cost(temp_found_noc_route, noc_model, curr_traffic_flow, noc_opts);
-        cost_check.latency += current_flow_latency_cost;
+        auto [curr_traffic_flow_latency_cost, curr_traffic_flow_latency_overrun_cost] = calculate_traffic_flow_latency_cost(temp_found_noc_route, noc_model, curr_traffic_flow);
+        cost_check.latency += curr_traffic_flow_latency_cost;
+        cost_check.latency_overrun += curr_traffic_flow_latency_overrun_cost;
 
         // increase bandwidth utilization for the links that constitute the current flow's route
         for (auto& link_id : temp_found_noc_route) {
@@ -485,24 +501,35 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
 
     // Iterate over all NoC links and accumulate congestion cost
     for(const auto& link : temp_noc_link_storage) {
-        cost_check.congestion += calculate_link_congestion_cost(link, noc_opts);
+        cost_check.congestion += calculate_link_congestion_cost(link);
     }
 
     // check whether the aggregate bandwidth placement cost is within the error tolerance
-    if (fabs(cost_check.aggregate_bandwidth - costs.noc_aggregate_bandwidth_cost) > costs.noc_aggregate_bandwidth_cost * error_tolerance) {
+    if (fabs(cost_check.aggregate_bandwidth - costs.noc_cost_terms.aggregate_bandwidth) > costs.noc_cost_terms.aggregate_bandwidth * error_tolerance) {
         VTR_LOG_ERROR(
             "noc_aggregate_bandwidth_cost_check: %g and noc_aggregate_bandwidth_cost: %g differ in check_noc_placement_costs.\n",
-            cost_check.aggregate_bandwidth, costs.noc_aggregate_bandwidth_cost);
+            cost_check.aggregate_bandwidth, costs.noc_cost_terms.aggregate_bandwidth);
         error++;
     }
 
     // only check the recomputed cost if it is above our expected latency cost threshold of 1 pico-second, otherwise there is no point in checking it
     if (cost_check.latency > MIN_EXPECTED_NOC_LATENCY_COST) {
         // check whether the latency placement cost is within the error tolerance
-        if (fabs(cost_check.latency - costs.noc_latency_cost) > costs.noc_latency_cost * error_tolerance) {
+        if (fabs(cost_check.latency - costs.noc_cost_terms.latency) > costs.noc_cost_terms.latency * error_tolerance) {
             VTR_LOG_ERROR(
                 "noc_latency_cost_check: %g and noc_latency_cost: %g differ in check_noc_placement_costs.\n",
-                cost_check.latency, costs.noc_latency_cost);
+                cost_check.latency, costs.noc_cost_terms.latency);
+            error++;
+        }
+    }
+
+    // only check the recomputed cost if it is above our expected latency cost threshold of 1 pico-second, otherwise there is no point in checking it
+    if (cost_check.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST) {
+        // check whether the latency overrun placement cost is within the error tolerance
+        if (fabs(cost_check.latency_overrun - costs.noc_cost_terms.latency_overrun) > costs.noc_cost_terms.latency_overrun * error_tolerance) {
+            VTR_LOG_ERROR(
+                "noc_latency_overrun_cost_check: %g and noc_latency_overrun_cost: %g differ in check_noc_placement_costs.\n",
+                cost_check.latency_overrun, costs.noc_cost_terms.latency_overrun);
             error++;
         }
     }
@@ -510,10 +537,10 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
     // check the recomputed congestion cost only if it is higher than the minimum expected value
     if (cost_check.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) {
         // check whether the NoC congestion cost is within the error range
-        if (fabs(cost_check.congestion - costs.noc_congestion_cost) > costs.noc_congestion_cost * error_tolerance) {
+        if (fabs(cost_check.congestion - costs.noc_cost_terms.congestion) > costs.noc_cost_terms.congestion * error_tolerance) {
             VTR_LOG_ERROR(
                 "noc_congestion_cost_check: %g and noc_congestion_cost: %g differ in check_noc_placement_costs.\n",
-                cost_check.congestion, costs.noc_congestion_cost);
+                cost_check.congestion, costs.noc_cost_terms.congestion);
             error++;
         }
     }
@@ -528,7 +555,9 @@ double calculate_traffic_flow_aggregate_bandwidth_cost(const std::vector<NocLink
     return (traffic_flow_info.traffic_flow_priority * traffic_flow_info.traffic_flow_bandwidth * num_of_links_in_traffic_flow);
 }
 
-double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route, const NocStorage& noc_model, const t_noc_traffic_flow& traffic_flow_info, const t_noc_opts& noc_opts) {
+std::pair<double, double> calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route,
+                                                              const NocStorage& noc_model,
+                                                              const t_noc_traffic_flow& traffic_flow_info) {
     // there will always be one more router than links in a traffic flow
     int num_of_links_in_traffic_flow = traffic_flow_route.size();
     int num_of_routers_in_traffic_flow = num_of_links_in_traffic_flow + 1;
@@ -538,26 +567,28 @@ double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic
     double noc_link_latency = noc_model.get_noc_link_latency();
     double noc_router_latency = noc_model.get_noc_router_latency();
 
-    // calculate the traffic flow_latency
+    // calculate the traffic flow latency
     double latency = (noc_link_latency * num_of_links_in_traffic_flow) + (noc_router_latency * num_of_routers_in_traffic_flow);
 
-    // calculate the cost
-    double single_traffic_flow_latency_cost = (noc_opts.noc_latency_constraints_weighting * std::max(0., latency - max_latency)) + (noc_opts.noc_latency_weighting * latency);
+    // calculate the traffic flow latency overrun
+    double latency_overrun = std::max(latency - max_latency, 0.);
 
     // scale the latency cost by its priority to indicate its importance
-    return (single_traffic_flow_latency_cost * traffic_flow_info.traffic_flow_priority);
-}
+    latency *= traffic_flow_info.traffic_flow_priority;
+    latency_overrun *= traffic_flow_info.traffic_flow_priority;
 
-double calculate_link_congestion_cost(const NocLink& link, const t_noc_opts& noc_opts) {
-    double congested_bw_ratio, congestion_cost;
+    return {latency, latency_overrun};
+}
 
-    congested_bw_ratio = link.get_congested_bandwidth_ratio();
-    congestion_cost = noc_opts.noc_congestion_weighting * congested_bw_ratio;
+double calculate_link_congestion_cost(const NocLink& link) {
+    double congested_bw_ratio = link.get_congested_bandwidth_ratio();
 
-    return congestion_cost;
+    return congested_bw_ratio;
 }
 
-double calculate_noc_cost(const NocCostTerms& cost_terms, const t_placer_costs& norm_factors, const t_noc_opts& noc_opts) {
+double calculate_noc_cost(const NocCostTerms& cost_terms,
+                          const NocCostTerms& norm_factors,
+                          const t_noc_opts& noc_opts) {
     double cost = 0.0;
 
     /* NoC's contribution to the placement cost is a weighted sum over:
@@ -566,9 +597,10 @@ double calculate_noc_cost(const NocCostTerms& cost_terms, const t_placer_costs&
      * 3) Link congestion costs
      */
     cost = noc_opts.noc_placement_weighting * (
-               cost_terms.latency * norm_factors.noc_latency_cost_norm +
-               cost_terms.aggregate_bandwidth * norm_factors.noc_aggregate_bandwidth_cost_norm +
-               cost_terms.congestion * norm_factors.noc_congestion_cost_norm);
+               cost_terms.aggregate_bandwidth * norm_factors.aggregate_bandwidth +
+               cost_terms.latency * norm_factors.latency * noc_opts.noc_latency_constraints_weighting +
+               cost_terms.latency_overrun * norm_factors.latency_overrun * noc_opts.noc_latency_constraints_weighting +
+               cost_terms.congestion * norm_factors.congestion * noc_opts.noc_congestion_weighting);
 
     return cost;
 }
@@ -692,7 +724,7 @@ bool check_for_router_swap(int user_supplied_noc_router_swap_percentage) {
      * we now only swap router blocks for the percentage of time the user
      * supplied.
      * */
-    return (vtr::irand(99) < user_supplied_noc_router_swap_percentage) ? true : false;
+    return (vtr::irand(99) < user_supplied_noc_router_swap_percentage);
 }
 
 static bool select_random_router_cluster(ClusterBlockId& b_from, t_pl_loc& from, t_logical_block_type_ptr& cluster_from_type) {
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index 4a13b2d67c9..58f041795dd 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -41,6 +41,7 @@ constexpr double INVALID_NOC_COST_TERM = -1.0;
 struct TrafficFlowPlaceCost {
     double aggregate_bandwidth = INVALID_NOC_COST_TERM;
     double latency = INVALID_NOC_COST_TERM;
+    double latency_overrun = INVALID_NOC_COST_TERM;
 };
 
 /**
@@ -108,8 +109,7 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
  * here.
  */
 void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected,
-                                                    NocCostTerms& delta_c,
-                                                    const t_noc_opts& noc_opts);
+                                                    NocCostTerms& delta_c);
 
 /**
  * @brief Updates static datastructures found in 'noc_place_utils.cpp'
@@ -313,9 +313,9 @@ double comp_noc_aggregate_bandwidth_cost(void);
  * 
  * @return double The latency cost of the NoC.
  */
-double comp_noc_latency_cost(const t_noc_opts& noc_opts);
+std::pair<double, double> comp_noc_latency_cost();
 
-double comp_noc_congestion_cost(const t_noc_opts& noc_opts);
+double comp_noc_congestion_cost();
 
 /**
  * @brief Given a placement state the NoC costs are re-computed
@@ -378,14 +378,11 @@ double calculate_traffic_flow_aggregate_bandwidth_cost(const std::vector<NocLink
  * @param noc_model Contains noc information such as the router and link
  * latencies.
  * @param traffic_flow_info Contains the traffic flow priority.
- * @param noc_opts Contains the user provided weightings of the traffic flow 
- * latency and its constraint parameters for the cost calculation.
- * @return The computed latency for the provided traffic flow
+ * @return The computed latency cost terms for the given traffic flow.
  */
-double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route,
-                                           const NocStorage& noc_model,
-                                           const t_noc_traffic_flow& traffic_flow_info,
-                                           const t_noc_opts& noc_opts);
+std::pair<double, double> calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route,
+                                                              const NocStorage& noc_model,
+                                                              const t_noc_traffic_flow& traffic_flow_info);
 
 /**
  * @brief Determines the congestion cost a NoC link. The cost
@@ -394,12 +391,9 @@ double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic
  *
  * @param link The NoC link for which the congestion cost is
  * to be computed
- * @param noc_opts Contains the user provided weighting factor to
- * specify the importance of congestion costs compared to other
- * NoC-related cost terms.
  * @return The computed congestion cost for the given NoC link.
  */
-double calculate_link_congestion_cost(const NocLink& link, const t_noc_opts& noc_opts);
+double calculate_link_congestion_cost(const NocLink& link);
 
 /**
  * @brief Computes a weighted average of NoC cost term to determine
@@ -416,7 +410,7 @@ double calculate_link_congestion_cost(const NocLink& link, const t_noc_opts& noc
  * total placement cost.
  */
 double calculate_noc_cost(const NocCostTerms& cost_terms,
-                          const t_placer_costs& norm_factors,
+                          const NocCostTerms& norm_factors,
                           const t_noc_opts& noc_opts);
 
 /**
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 1f37581f7af..8eed5b14518 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -15,6 +15,7 @@
 #include "vtr_random.h"
 #include "vtr_geometry.h"
 #include "vtr_time.h"
+#include "vtr_math.h"
 
 #include "vpr_types.h"
 #include "vpr_error.h"
@@ -536,9 +537,7 @@ static void print_place_status(const t_annealing_state& state,
                                float sWNS,
                                size_t tot_moves,
                                bool noc_enabled,
-                               float noc_agg_bw,
-                               float noc_agg_latency,
-                               float noc_cong);
+                               const NocCostTerms& noc_cost_terms);
 
 static void print_resources_utilization();
 
@@ -802,9 +801,9 @@ void try_place(const Netlist<>& net_list,
 
     if (noc_opts.noc) {
         // get the costs associated with the NoC
-        costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost();
-        costs.noc_latency_cost = comp_noc_latency_cost(noc_opts);
-        costs.noc_congestion_cost = comp_noc_congestion_cost(noc_opts);
+        costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost();
+        std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = comp_noc_latency_cost();
+        costs.noc_cost_terms.congestion = comp_noc_congestion_cost();
 
         // initialize all the noc normalization factors
         update_noc_normalization_factors(costs);
@@ -832,11 +831,11 @@ void try_place(const Netlist<>& net_list,
             "noc_congestion_cost: %g, "
             "accum_congested_ratio: %g, "
             "n_congested_links: %d \n",
-            calculate_noc_cost(NocCostTerms(costs), costs, noc_opts),
-            costs.noc_aggregate_bandwidth_cost,
-            costs.noc_latency_cost,
+            calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts),
+            costs.noc_cost_terms.aggregate_bandwidth,
+            costs.noc_cost_terms.latency,
             get_number_of_traffic_flows_with_latency_cons_met(),
-            costs.noc_congestion_cost,
+            costs.noc_cost_terms.congestion,
             get_total_congestion_bandwidth_ratio(),
             get_number_of_congested_noc_links());
     }
@@ -880,11 +879,11 @@ void try_place(const Netlist<>& net_list,
                 "noc_congestion_cost: %g "
                 "accum_congested_ratio: %g, "
                 "n_congested_links: %d",
-                calculate_noc_cost(NocCostTerms(costs), costs, noc_opts),
-                costs.noc_aggregate_bandwidth_cost,
-                costs.noc_latency_cost,
+                calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts),
+                costs.noc_cost_terms.aggregate_bandwidth,
+                costs.noc_cost_terms.latency,
                 get_number_of_traffic_flows_with_latency_cons_met(),
-                costs.noc_congestion_cost,
+                costs.noc_cost_terms.congestion,
                 get_total_congestion_bandwidth_ratio(),
                 get_number_of_congested_noc_links());
     }
@@ -1033,8 +1032,7 @@ void try_place(const Netlist<>& net_list,
 
             print_place_status(state, stats, temperature_timer.elapsed_sec(),
                                critical_path.delay(), sTNS, sWNS, tot_iter,
-                               noc_opts.noc, costs.noc_aggregate_bandwidth_cost,
-                               costs.noc_latency_cost, costs.noc_congestion_cost);
+                               noc_opts.noc, costs.noc_cost_terms);
 
             if (placer_opts.place_algorithm.is_timing_driven()
                 && placer_opts.place_agent_multistate
@@ -1106,8 +1104,7 @@ void try_place(const Netlist<>& net_list,
 
         print_place_status(state, stats, temperature_timer.elapsed_sec(),
                            critical_path.delay(), sTNS, sWNS, tot_iter,
-                           noc_opts.noc, costs.noc_aggregate_bandwidth_cost,
-                           costs.noc_latency_cost, costs.noc_congestion_cost);
+                           noc_opts.noc, costs.noc_cost_terms);
     }
     auto post_quench_timing_stats = timing_ctx.stats;
 
@@ -1201,33 +1198,33 @@ void try_place(const Netlist<>& net_list,
         sprintf(msg,
                 "\nNoC Placement Costs. "
                 "noc cost: %g, "
-                "noc_aggregate_bandwidth_cost: %g "
-                "noc_latency_cost: %g "
-                "noc_latency_constraints_cost: %d "
-                "noc_congestion_cost: %g "
+                "noc_aggregate_bandwidth_cost: %g, "
+                "noc_latency_cost: %g, "
+                "noc_latency_constraints_cost: %d, "
+                "noc_congestion_cost: %g, "
                 "accum_congested_ratio: %g, "
-                "n_congested_links: %d",
-                calculate_noc_cost(NocCostTerms(costs), costs, noc_opts),
-                costs.noc_aggregate_bandwidth_cost,
-                costs.noc_latency_cost,
+                "n_congested_links: %d \n",
+                calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts),
+                costs.noc_cost_terms.aggregate_bandwidth,
+                costs.noc_cost_terms.latency,
                 get_number_of_traffic_flows_with_latency_cons_met(),
-                costs.noc_congestion_cost,
+                costs.noc_cost_terms.congestion,
                 get_total_congestion_bandwidth_ratio(),
                 get_number_of_congested_noc_links());
 
         VTR_LOG("\nNoC Placement Costs. "
             "noc cost: %g, "
-            "noc_aggregate_bandwidth_cost: %g "
+            "noc_aggregate_bandwidth_cost: %g, "
             "noc_latency_cost: %g, "
             "noc_latency_constraints_cost: %d, "
             "noc_congestion_cost: %g, "
             "accum_congested_ratio: %g, "
             "n_congested_links: %d \n",
-            calculate_noc_cost(NocCostTerms(costs), costs, noc_opts),
-            costs.noc_aggregate_bandwidth_cost,
-            costs.noc_latency_cost,
+            calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts),
+            costs.noc_cost_terms.aggregate_bandwidth,
+            costs.noc_cost_terms.latency,
             get_number_of_traffic_flows_with_latency_cons_met(),
-            costs.noc_congestion_cost,
+            costs.noc_cost_terms.congestion,
             get_total_congestion_bandwidth_ratio(),
             get_number_of_congested_noc_links());
     }
@@ -1414,63 +1411,63 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
                                          const PlaceDelayModel* delay_model,
                                          const PlacerCriticalities* criticalities,
                                          t_placer_costs* costs) {
+    auto check_and_print_cost = [](double new_cost,
+                                   double old_cost,
+                                   const std::string& cost_name) {
+        if (!vtr::isclose(new_cost, old_cost, ERROR_TOL, 0.)) {
+            std::string msg = vtr::string_fmt(
+                "in recompute_costs_from_scratch: new_%s = %g, old %s = %g, ERROR_TOL = %g\n",
+                cost_name.c_str(), new_cost, cost_name.c_str(), old_cost, ERROR_TOL);
+            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
+        }
+    };
+
     double new_bb_cost = recompute_bb_cost();
-    if (fabs(new_bb_cost - costs->bb_cost) > costs->bb_cost * ERROR_TOL) {
-        std::string msg = vtr::string_fmt(
-            "in recompute_costs_from_scratch: new_bb_cost = %g, old bb_cost = %g\n",
-            new_bb_cost, costs->bb_cost);
-        VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-    }
+    check_and_print_cost(new_bb_cost, costs->bb_cost, "bb_cost");
     costs->bb_cost = new_bb_cost;
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
         double new_timing_cost = 0.;
         comp_td_costs(delay_model, *criticalities, &new_timing_cost);
-        if (fabs(
-                new_timing_cost
-                - costs->timing_cost)
-            > costs->timing_cost * ERROR_TOL) {
-            std::string msg = vtr::string_fmt(
-                "in recompute_costs_from_scratch: new_timing_cost = %g, old timing_cost = %g, ERROR_TOL = %g\n",
-                new_timing_cost, costs->timing_cost, ERROR_TOL);
-            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-        }
+        check_and_print_cost(new_timing_cost, costs->timing_cost, "timing_cost");
         costs->timing_cost = new_timing_cost;
     } else {
         VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE);
-
         costs->cost = new_bb_cost * costs->bb_cost_norm;
     }
 
     if (noc_opts.noc) {
-        NocCostTerms new_noc_cost{0.0, 0.0, 0.0};
+        NocCostTerms new_noc_cost;
         recompute_noc_costs(new_noc_cost);
 
-        if (fabs(
-                new_noc_cost.aggregate_bandwidth
-                - costs->noc_aggregate_bandwidth_cost)
-            > costs->noc_aggregate_bandwidth_cost * ERROR_TOL) {
-            std::string msg = vtr::string_fmt(
-                "in recompute_costs_from_scratch: new_noc_cost.aggregate_bandwidth = %g, old noc_aggregate_bandwidth_cost = %g, ERROR_TOL = %g\n",
-                new_noc_cost.aggregate_bandwidth, costs->noc_aggregate_bandwidth_cost, ERROR_TOL);
-            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-        }
-        costs->noc_aggregate_bandwidth_cost = new_noc_cost.aggregate_bandwidth;
+        check_and_print_cost(new_noc_cost.aggregate_bandwidth,
+                             costs->noc_cost_terms.aggregate_bandwidth,
+                             "noc_aggregate_bandwidth");
+        costs->noc_cost_terms.aggregate_bandwidth = new_noc_cost.aggregate_bandwidth;
 
         // only check if the recomputed cost and the current noc latency cost are within the error tolerance if the cost is above 1 picosecond.
         // Otherwise, there is no need to check (we expect the latency cost to be above the threshold of 1 picosecond)
         if (new_noc_cost.latency > MIN_EXPECTED_NOC_LATENCY_COST) {
-            if (fabs(
-                    new_noc_cost.latency
-                    - costs->noc_latency_cost)
-                > costs->noc_latency_cost * ERROR_TOL) {
-                std::string msg = vtr::string_fmt(
-                    "in recompute_costs_from_scratch: new_noc_cost.latency = %g, old noc_latency_cost = %g, ERROR_TOL = %g\n",
-                    new_noc_cost.latency, costs->noc_latency_cost, ERROR_TOL);
-                VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-            }
+            check_and_print_cost(new_noc_cost.latency,
+                                 costs->noc_cost_terms.latency,
+                                 "noc_latency_cost");
+        }
+        costs->noc_cost_terms.latency = new_noc_cost.latency;
+
+        if (new_noc_cost.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST) {
+            check_and_print_cost(new_noc_cost.latency_overrun,
+                                 costs->noc_cost_terms.latency_overrun,
+                                 "noc_latency_overrun_cost");
         }
-        costs->noc_latency_cost = new_noc_cost.latency;
+        costs->noc_cost_terms.latency_overrun = new_noc_cost.latency_overrun;
+
+        if (new_noc_cost.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) {
+            check_and_print_cost(new_noc_cost.congestion,
+                                 costs->noc_cost_terms.congestion,
+                                 "noc_congestion_cost");
+        }
+        costs->noc_cost_terms.congestion = new_noc_cost.congestion;
+
     }
 }
 
@@ -1808,13 +1805,13 @@ static e_move_result try_swap(const t_annealing_state* state,
         }
 
 
-        NocCostTerms noc_delta_c {0.0, 0.0, 0.0}; // change in NoC cost
+        NocCostTerms noc_delta_c; // change in NoC cost
         /* Update the NoC datastructure and costs*/
         if (noc_opts.noc) {
-            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c, noc_opts);
+            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c);
 
             // Include the NoC delta costs in the total cost change for this swap
-            delta_c += calculate_noc_cost(noc_delta_c, *costs, noc_opts);
+            delta_c += calculate_noc_cost(noc_delta_c, costs->noc_cost_norm_factors, noc_opts);
         }
 
         /* 1 -> move accepted, 0 -> rejected. */
@@ -2335,7 +2332,7 @@ static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_
 
     if (noc_opts.noc) {
         // in noc mode we include noc aggregate bandwidth and noc latency
-        total_cost += calculate_noc_cost(NocCostTerms(*costs), *costs, noc_opts);
+        total_cost += calculate_noc_cost(costs->noc_cost_terms, costs->noc_cost_norm_factors, noc_opts);
     }
 
     return total_cost;
@@ -4227,13 +4224,13 @@ static void print_place_status_header(bool noc_enabled) {
             "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
     } else {
         VTR_LOG(
-            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- ---------\n");
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- ---------  ---------\n");
         VTR_LOG(
-            "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha Agg. BW  Agg. Lat NoC Cong.\n");
+            "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha Agg. BW  Agg. Lat Lat Over. NoC Cong.\n");
         VTR_LOG(
-            "      (sec)                                          (ns)       (ns)     (ns)                                                   (bps)     (ns)            \n");
+            "      (sec)                                          (ns)       (ns)     (ns)                                                   (bps)     (ns)     (ns)             \n");
         VTR_LOG(
-            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- ---------\n");
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- --------- ---------\n");
     }
 
 }
@@ -4246,9 +4243,7 @@ static void print_place_status(const t_annealing_state& state,
                                float sWNS,
                                size_t tot_moves,
                                bool noc_enabled,
-                               float noc_agg_bw,
-                               float noc_agg_latency,
-                               float noc_cong) {
+                               const NocCostTerms& noc_cost_terms) {
     VTR_LOG(
         "%4zu %6.1f %7.1e "
         "%7.3f %10.2f %-10.5g "
@@ -4265,8 +4260,10 @@ static void print_place_status(const t_annealing_state& state,
 
     if (noc_enabled) {
         VTR_LOG(
-            " %7.2e %7.2e %8.2f",
-            noc_agg_bw, noc_agg_latency, noc_cong);
+            " %7.2e %7.2e"
+            " %8.2e %8.2f",
+            noc_cost_terms.aggregate_bandwidth, noc_cost_terms.latency,
+            noc_cost_terms.latency_overrun, noc_cost_terms.congestion);
     }
 
     VTR_LOG("\n");
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 2b9d38e9438..7358823d981 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -79,9 +79,7 @@ void t_placer_costs::update_norm_factors() {
  * @param noc_delta_cost NoC cost difference if the swap is accepted
  */
 t_placer_costs& t_placer_costs::operator+=(const NocCostTerms& noc_delta_cost) {
-    noc_aggregate_bandwidth_cost += noc_delta_cost.aggregate_bandwidth;
-    noc_latency_cost += noc_delta_cost.latency;
-    noc_congestion_cost += noc_delta_cost.congestion;
+    noc_cost_terms += noc_delta_cost;
 
     return *this;
 }
@@ -561,12 +559,24 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_
     return (mac_can_be_placed);
 }
 
-NocCostTerms::NocCostTerms(const t_placer_costs& costs)
-    : aggregate_bandwidth(costs.noc_aggregate_bandwidth_cost)
-    , latency(costs.noc_latency_cost)
-    , congestion(costs.noc_congestion_cost) {}
-
-NocCostTerms::NocCostTerms(double agg_bw, double lat, double congest)
+NocCostTerms::NocCostTerms(double agg_bw, double lat, double lat_overrun, double congest)
     : aggregate_bandwidth(agg_bw)
     , latency(lat)
+    , latency_overrun(lat_overrun)
     , congestion(congest) {}
+
+NocCostTerms::NocCostTerms()
+    : aggregate_bandwidth(0)
+    , latency(0)
+    , latency_overrun(0)
+    , congestion(0) {}
+
+NocCostTerms& NocCostTerms::operator+=(const NocCostTerms& noc_delta_cost) {
+    aggregate_bandwidth += noc_delta_cost.aggregate_bandwidth;
+    latency += noc_delta_cost.latency;
+    latency_overrun += noc_delta_cost.latency_overrun;
+    congestion += noc_delta_cost.congestion;
+
+    return *this;
+}
+
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 2e1355f121d..22415b309d6 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -27,12 +27,15 @@ class t_placer_costs;
  */
 struct NocCostTerms {
   public:
-    NocCostTerms() = delete;
-    explicit NocCostTerms(const t_placer_costs& costs);
-    NocCostTerms(double agg_bw, double lat, double congest);
+    NocCostTerms();
+    NocCostTerms(const NocCostTerms&) = default;
+    NocCostTerms(double agg_bw, double lat, double lat_overrun, double congest);
+    NocCostTerms& operator=(const NocCostTerms& other) = default;
+    NocCostTerms& operator+=(const NocCostTerms& noc_delta_cost);
 
     double aggregate_bandwidth = 0.0;
     double latency = 0.0;
+    double latency_overrun = 0.0;
     double congestion = 0.0;
 };
 
@@ -86,17 +89,14 @@ class t_placer_costs {
     double timing_cost = 0.;
     double bb_cost_norm = 0.;
     double timing_cost_norm = 0.;
-    double noc_aggregate_bandwidth_cost = 0.;
-    double noc_aggregate_bandwidth_cost_norm = 0.;
-    double noc_latency_cost = 0.;
-    double noc_latency_cost_norm = 0.;
-    double noc_congestion_cost = 0.;
-    double noc_congestion_cost_norm = 0.;
+
+    NocCostTerms noc_cost_terms;
+    NocCostTerms noc_cost_norm_factors;
 
   public: //Constructor
     t_placer_costs(t_place_algorithm algo)
         : place_algorithm(algo) {}
-    t_placer_costs() {}
+    t_placer_costs() = default;
 
   public: //Mutator
     void update_norm_factors();

From 477063419983ef30b01b10fd68086a8cd35a44c6 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 1 Feb 2024 15:26:10 -0500
Subject: [PATCH 28/41] Update unit tests

---
 vpr/test/test_noc_place_utils.cpp | 266 +++++++++++++++++-------------
 1 file changed, 154 insertions(+), 112 deletions(-)

diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index 4df3d8e1e7c..8e53ec68ed9 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -406,13 +406,9 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
         //initialize all the cost calculator datastructures
         allocate_and_load_noc_placement_structs();
 
-        // create the noc options
-        t_noc_opts noc_opts;
-        noc_opts.noc_latency_constraints_weighting = dist_3(double_engine);
-        noc_opts.noc_latency_weighting = dist_3(double_engine);
-
-        // create local variable to store the latency cost
+        // create local variable to store the latency cost terms
         double golden_total_noc_latency_costs = 0.;
+        double golden_total_noc_latency_overrun_costs = 0.;
 
         // local router and link latency parameters
         double router_latency = noc_ctx.noc_model.get_noc_router_latency();
@@ -423,20 +419,23 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
             const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number);
 
             double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_route_sizes[traffic_flow_number] + 1)) + (link_latency * golden_traffic_flow_route_sizes[traffic_flow_number]);
+            double curr_traffic_flow_latency_overrun = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
 
             // calculate the latency cost
-            double current_latency_cost = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
-            current_latency_cost *= curr_traffic_flow.traffic_flow_priority;
+            double current_latency_cost = curr_traffic_flow_latency * curr_traffic_flow.traffic_flow_priority;
+            double current_latency_overrun_cost = curr_traffic_flow_latency_overrun * curr_traffic_flow.traffic_flow_priority;
 
             golden_total_noc_latency_costs += current_latency_cost;
+            golden_total_noc_latency_overrun_costs += current_latency_overrun_cost;
         }
 
         // run the test function and get the latency cost calculated
-        double found_latency_cost = comp_noc_latency_cost(noc_opts);
+        auto [found_latency_cost, found_latency_overrun_cost] = comp_noc_latency_cost();
 
         // compare the test function latency cost to the golden value
         // since we are comparing double numbers we allow a tolerance of difference
         REQUIRE(vtr::isclose(golden_total_noc_latency_costs, found_latency_cost));
+        REQUIRE(vtr::isclose(golden_total_noc_latency_overrun_costs, found_latency_overrun_cost));
 
         // release the cost calculator datastructures
         free_noc_placement_structs();
@@ -446,21 +445,17 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
         //initialize all the cost calculator datastructures
         allocate_and_load_noc_placement_structs();
 
-        // create the noc options
-        t_noc_opts noc_opts;
-        noc_opts.noc_congestion_weighting = dist_3(double_engine);
-
         // create local variable to store the latency cost
         double golden_total_noc_congestion_costs = 0.;
 
         for (const auto& link : noc_ctx.noc_model.get_noc_links()) {
             double congested_bw_ratio = link.get_congested_bandwidth_ratio();
 
-            golden_total_noc_congestion_costs += noc_opts.noc_congestion_weighting * congested_bw_ratio;
+            golden_total_noc_congestion_costs += congested_bw_ratio;
         }
 
         // run the test function to get the congestion cost
-        double found_congestion_cost = comp_noc_congestion_cost(noc_opts);
+        double found_congestion_cost = comp_noc_congestion_cost();
 
         // compare the test function congestion cost to the golden value
         // since we are comparing double numbers we allow a tolerance of difference
@@ -633,14 +628,16 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // store the traffic flow bandwidth costs and latency costs
     vtr::vector<NocTrafficFlowId, double> golden_traffic_flow_bandwidth_costs;
     vtr::vector<NocTrafficFlowId, double> golden_traffic_flow_latency_costs;
+    vtr::vector<NocTrafficFlowId, double> golden_traffic_flow_latency_overrun_costs;
     golden_traffic_flow_bandwidth_costs.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows());
     golden_traffic_flow_latency_costs.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows());
+    golden_traffic_flow_latency_overrun_costs.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows());
     // store link congestion costs
     vtr::vector<NocLinkId, double> golden_link_congestion_costs;
     golden_link_congestion_costs.resize(noc_ctx.noc_model.get_number_of_noc_links());
 
     // stores the change in bandwidth and latency costs from the test function
-    NocCostTerms test_noc_costs{0.0, 0.0, 0.0};
+    NocCostTerms test_noc_costs;
 
     // we need to route all the traffic flows based on their initial positions
     for (int traffic_flow_number = 0; traffic_flow_number < number_of_created_traffic_flows; traffic_flow_number++) {
@@ -667,29 +664,33 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // also initialize the bandwidth and latency costs for all traffic flows
     // and sum them up to calculate the total initial aggregate bandwidth and latency costs for the NoC
     for (int traffic_flow_number = 0; traffic_flow_number < number_of_created_traffic_flows; traffic_flow_number++) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number);
+        const auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number;
+        const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
-        for (auto& link : golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number]) {
+        for (auto& link : golden_traffic_flow_routes[traffic_flow_id]) {
             golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
         }
 
         // calculate the bandwidth cost
-        golden_traffic_flow_bandwidth_costs[(NocTrafficFlowId)traffic_flow_number] = golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number].size() * curr_traffic_flow.traffic_flow_bandwidth;
-        golden_traffic_flow_bandwidth_costs[(NocTrafficFlowId)traffic_flow_number] *= curr_traffic_flow.traffic_flow_priority;
+        golden_traffic_flow_bandwidth_costs[traffic_flow_id] = golden_traffic_flow_routes[traffic_flow_id].size() * curr_traffic_flow.traffic_flow_bandwidth;
+        golden_traffic_flow_bandwidth_costs[traffic_flow_id] *= curr_traffic_flow.traffic_flow_priority;
 
-        double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number].size() + 1)) + (link_latency * golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number].size());
+        double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[traffic_flow_id].size() + 1)) + (link_latency * golden_traffic_flow_routes[traffic_flow_id].size());
 
-        golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number] = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
-        golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number] *= curr_traffic_flow.traffic_flow_priority;
+        golden_traffic_flow_latency_costs[traffic_flow_id] = curr_traffic_flow_latency;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow_id] = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
+        golden_traffic_flow_latency_costs[traffic_flow_id] *= curr_traffic_flow.traffic_flow_priority;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow_id] *= curr_traffic_flow.traffic_flow_priority;
 
-        test_noc_costs.aggregate_bandwidth += golden_traffic_flow_bandwidth_costs[(NocTrafficFlowId)traffic_flow_number];
-        test_noc_costs.latency += golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number];
+        test_noc_costs.aggregate_bandwidth += golden_traffic_flow_bandwidth_costs[traffic_flow_id];
+        test_noc_costs.latency += golden_traffic_flow_latency_costs[traffic_flow_id];
+        test_noc_costs.latency_overrun += golden_traffic_flow_latency_overrun_costs[traffic_flow_id];
     }
 
     // initialize golden congestion cost for all links
     for (const auto& link : noc_ctx.noc_model.get_noc_links()) {
         auto link_id = link.get_link_id();
-        golden_link_congestion_costs[link_id] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link_id] - link_bandwidth, 0.0);
+        golden_link_congestion_costs[link_id] = std::max(golden_link_bandwidths[link_id] - link_bandwidth, 0.0);
         test_noc_costs.congestion += golden_link_congestion_costs[link_id];
     }
 
@@ -698,8 +699,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
     // We need to run these functions as they initialize local variables needed to run the test function within this unit test. we assume this is correct
     comp_noc_aggregate_bandwidth_cost();
-    comp_noc_latency_cost(noc_opts);
-    comp_noc_congestion_cost(noc_opts);
+    comp_noc_latency_cost();
+    comp_noc_congestion_cost();
 
     // datastructure that keeps track of moved blocks during placement
     t_pl_blocks_to_be_moved blocks_affected(NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST);
@@ -767,7 +768,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
                 // go through the current traffic flow and reduce the bandwidths of the links
                 for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
                     golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
-                    golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
+                    golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
                 }
 
                 // re-route the traffic flow
@@ -776,7 +777,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
                 // go through the current traffic flow and increase the bandwidths of the links
                 for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
                     golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
-                    golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
+                    golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
                 }
 
                 // update the costs now
@@ -785,8 +786,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
                 double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[traffic_flow].size() + 1)) + (link_latency * golden_traffic_flow_routes[traffic_flow].size());
 
-                golden_traffic_flow_latency_costs[traffic_flow] = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
+                golden_traffic_flow_latency_costs[traffic_flow] = curr_traffic_flow_latency;
+                golden_traffic_flow_latency_overrun_costs[traffic_flow] = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
                 golden_traffic_flow_latency_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
+                golden_traffic_flow_latency_overrun_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
 
                 routed_traffic_flows.insert(traffic_flow);
             }
@@ -801,7 +804,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
                 // go through the current traffic flow and reduce the bandwidths of the links
                 for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
                     golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
-                    golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
+                    golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
                 }
 
                 // re-route the traffic flow
@@ -810,7 +813,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
                 // go through the current traffic flow and increase the bandwidths of the links
                 for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
                     golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
-                    golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
+                    golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
                 }
 
                 // update the costs now
@@ -819,21 +822,24 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
                 double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[traffic_flow].size() + 1)) + (link_latency * golden_traffic_flow_routes[traffic_flow].size());
 
-                golden_traffic_flow_latency_costs[traffic_flow] = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
+                golden_traffic_flow_latency_costs[traffic_flow] = curr_traffic_flow_latency;
+                golden_traffic_flow_latency_overrun_costs[traffic_flow] = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
                 golden_traffic_flow_latency_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
+                golden_traffic_flow_latency_overrun_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
 
                 routed_traffic_flows.insert(traffic_flow);
             }
         }
 
-        NocCostTerms delta_cost {0.0, 0.0, 0.0};
+        NocCostTerms delta_cost;
 
         // call the test function
-        find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, noc_opts);
+        find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost);
 
-        // update the test total noc bandwidth, latency, and congestion costs based on the cost changes found by the test functions
+        // update the test noc cost terms based on the cost changes found by the test functions
         test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
         test_noc_costs.latency += delta_cost.latency;
+        test_noc_costs.latency_overrun += delta_cost.latency_overrun;
         test_noc_costs.congestion += delta_cost.congestion;
 
         // need this function to update the local datastructures that store all the traffic flow costs
@@ -909,7 +915,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and reduce the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
-            golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
+            golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // re-route the traffic flow
@@ -918,7 +924,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and increase the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
-            golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
+            golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // update the costs now
@@ -927,8 +933,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
         double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[traffic_flow].size() + 1)) + (link_latency * golden_traffic_flow_routes[traffic_flow].size());
 
-        golden_traffic_flow_latency_costs[traffic_flow] = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
+        golden_traffic_flow_latency_costs[traffic_flow] = curr_traffic_flow_latency;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow] = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
         golden_traffic_flow_latency_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
     }
 
     // this is for the second swapped block
@@ -939,7 +947,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and reduce the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
-            golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
+            golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // re-route the traffic flow
@@ -948,7 +956,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and increase the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
-            golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
+            golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // update the costs now
@@ -957,18 +965,21 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
         double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[traffic_flow].size() + 1)) + (link_latency * golden_traffic_flow_routes[traffic_flow].size());
 
-        golden_traffic_flow_latency_costs[traffic_flow] = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
+        golden_traffic_flow_latency_costs[traffic_flow] = curr_traffic_flow_latency;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow] = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
         golden_traffic_flow_latency_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
     }
 
-    NocCostTerms delta_cost {0.0, 0.0, 0.0};
+    NocCostTerms delta_cost;
 
     // call the test function
-    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, noc_opts);
+    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost);
 
-    // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
+    // update the test noc cost terms based on the cost changes found by the test functions
     test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
     test_noc_costs.latency += delta_cost.latency;
+    test_noc_costs.latency_overrun += delta_cost.latency_overrun;
     test_noc_costs.congestion += delta_cost.congestion;
 
     // need this function to update the local datastructures that store all the traffic flow costs
@@ -1032,7 +1043,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and reduce the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
-            golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
+            golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // re-route the traffic flow
@@ -1041,7 +1052,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and increase the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
-            golden_link_congestion_costs[link] = noc_opts.noc_congestion_weighting * std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
+            golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // update the costs now
@@ -1050,19 +1061,22 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
         double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[traffic_flow].size() + 1)) + (link_latency * golden_traffic_flow_routes[traffic_flow].size());
 
-        golden_traffic_flow_latency_costs[traffic_flow] = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
+        golden_traffic_flow_latency_costs[traffic_flow] = curr_traffic_flow_latency;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow] = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
         golden_traffic_flow_latency_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
     }
 
     // reset the delta costs
-    delta_cost = NocCostTerms {0.0, 0.0, 0.0};
+    delta_cost = NocCostTerms();
 
     // call the test function
-    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, noc_opts);
+    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost);
 
-    // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
+    // update the test noc cost terms based on the cost changes found by the test functions
     test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
     test_noc_costs.latency += delta_cost.latency;
+    test_noc_costs.latency_overrun += delta_cost.latency_overrun;
     test_noc_costs.congestion += delta_cost.congestion;
 
     // need this function to update the local datastructures that store all the traffic flow costs
@@ -1119,14 +1133,15 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // we don't have to calculate the costs or update bandwidths because the swapped router blocks do not have any associated traffic flows //
 
     // reset the delta costs
-    delta_cost = NocCostTerms {0.0, 0.0, 0.0};
+    delta_cost = NocCostTerms();
 
     // call the test function
-    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, noc_opts);
+    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost);
 
-    // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
+    // update the test noc cost terms based on the cost changes found by the test functions
     test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
     test_noc_costs.latency += delta_cost.latency;
+    test_noc_costs.latency_overrun += delta_cost.latency_overrun;
     test_noc_costs.congestion += delta_cost.congestion;
 
     // need this function to update the local datastructures that store all the traffic flow costs
@@ -1149,14 +1164,17 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         REQUIRE(golden_link_congested_bandwidth_ratio == current_link.get_congested_bandwidth_ratio());
     }
 
-    // now find the total expected noc aggregate bandwidth, latency, and congestion cost
+    // now find the total expected noc cost terms
     double golden_total_noc_aggr_bandwidth_cost = 0.;
     double golden_total_noc_latency_cost = 0.;
+    double golden_total_noc_latency_overrun_cost = 0.;
     double golden_total_noc_congestion_cost = 0.;
 
     for (int traffic_flow_number = 0; traffic_flow_number < number_of_created_traffic_flows; traffic_flow_number++) {
-        golden_total_noc_aggr_bandwidth_cost += golden_traffic_flow_bandwidth_costs[(NocTrafficFlowId)traffic_flow_number];
-        golden_total_noc_latency_cost += golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number];
+        const auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number;
+        golden_total_noc_aggr_bandwidth_cost += golden_traffic_flow_bandwidth_costs[traffic_flow_id];
+        golden_total_noc_latency_cost += golden_traffic_flow_latency_costs[traffic_flow_id];
+        golden_total_noc_latency_overrun_cost += golden_traffic_flow_latency_overrun_costs[traffic_flow_id];
     }
 
     golden_total_noc_congestion_cost = std::accumulate(golden_link_congestion_costs.begin(), golden_link_congestion_costs.end(), 0.0);
@@ -1164,7 +1182,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // now check whether the expected noc costs that we manually calculated above match the noc costs found through the test function (we allow for a tolerance of difference)
     REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth));
     REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency));
-    std::cout << golden_total_noc_congestion_cost << " " <<  test_noc_costs.congestion << std::endl;
+    std::cout << golden_total_noc_latency_overrun_cost << " " <<  test_noc_costs.latency_overrun << std::endl;
+    REQUIRE(vtr::isclose(golden_total_noc_latency_overrun_cost, test_noc_costs.latency_overrun));
     REQUIRE(vtr::isclose(golden_total_noc_congestion_cost, test_noc_costs.congestion));
 
     // now test the recompute cost function //
@@ -1173,6 +1192,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // start by resetting the test cost variables
     test_noc_costs.aggregate_bandwidth = 0.;
     test_noc_costs.latency = 0.;
+    test_noc_costs.latency_overrun = 0.;
     test_noc_costs.congestion = 0.;
 
     // now execute the test function
@@ -1181,6 +1201,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // now verify
     REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth));
     REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency));
+    REQUIRE(vtr::isclose(golden_total_noc_latency_overrun_cost, test_noc_costs.latency_overrun));
     REQUIRE(vtr::isclose(golden_total_noc_congestion_cost, test_noc_costs.congestion));
 
     // delete local datastructures
@@ -1193,99 +1214,107 @@ TEST_CASE("test_update_noc_normalization_factors", "[noc_place_utils]") {
     t_placer_opts placer_opts;
 
     SECTION("Test case where the bandwidth cost is 0") {
-        costs.noc_aggregate_bandwidth_cost = 0.;
-        costs.noc_latency_cost = 1.;
-        costs.noc_congestion_cost = 1.;
+        costs.noc_cost_terms.aggregate_bandwidth = 0.;
+        costs.noc_cost_terms.latency = 1.;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
 
         // verify the aggregate bandwidth normalized cost
         // this should not be +INF and instead trimmed
-        REQUIRE(costs.noc_aggregate_bandwidth_cost_norm == 1.0);
+        REQUIRE(costs.noc_cost_norm_factors.aggregate_bandwidth == 1.0);
     }
     SECTION("Test case where the latency cost is 0") {
-        costs.noc_aggregate_bandwidth_cost = 1.;
-        costs.noc_latency_cost = 0.;
-        costs.noc_congestion_cost = 1.;
+        costs.noc_cost_terms.aggregate_bandwidth = 1.;
+        costs.noc_cost_terms.latency = 0.;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
 
         // verify the latency normalized cost
         // this should not be +INF and instead trimmed
-        REQUIRE(costs.noc_latency_cost_norm == 1.e12);
+        REQUIRE(costs.noc_cost_norm_factors.latency == 1.e12);
     }
     SECTION("Test case where the bandwidth cost is an expected value") {
-        costs.noc_aggregate_bandwidth_cost = 1.e9;
-        costs.noc_latency_cost = 0.;
-        costs.noc_congestion_cost = 1.;
+        costs.noc_cost_terms.aggregate_bandwidth = 1.e9;
+        costs.noc_cost_terms.latency = 1.;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
 
         // verify the aggregate bandwidth normalized cost
         // this should not be trimmed
-        REQUIRE(costs.noc_aggregate_bandwidth_cost_norm == 1.e-9);
+        REQUIRE(costs.noc_cost_norm_factors.aggregate_bandwidth == 1.e-9);
     }
     SECTION("Test case where the latency cost is an expected value") {
-        costs.noc_aggregate_bandwidth_cost = 1.;
-        costs.noc_latency_cost = 50.e-12;
-        costs.noc_congestion_cost = 1.;
+        costs.noc_cost_terms.aggregate_bandwidth = 1.;
+        costs.noc_cost_terms.latency = 50.e-12;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
 
         // verify the latency normalized cost
         // this should not be trimmed
-        REQUIRE(costs.noc_latency_cost_norm == 2.e10);
+        REQUIRE(costs.noc_cost_norm_factors.latency == 2.e10);
     }
     SECTION("Test case where the latency cost is lower than the smallest expected value") {
-        costs.noc_aggregate_bandwidth_cost = 1.;
-        costs.noc_latency_cost = 999.e-15;
-        costs.noc_congestion_cost = 1.;
+        costs.noc_cost_terms.aggregate_bandwidth = 1.;
+        costs.noc_cost_terms.latency = 999.e-15;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
 
         // verify the latency normalized cost
         // this should not be trimmed
-        REQUIRE(costs.noc_latency_cost_norm == 1.e12);
+        REQUIRE(costs.noc_cost_norm_factors.latency == 1.e12);
     }
     SECTION("Test case where the congestion cost is zero") {
-        costs.noc_aggregate_bandwidth_cost = 1.;
-        costs.noc_latency_cost = 1.;
-        costs.noc_congestion_cost = 0.;
+        costs.noc_cost_terms.aggregate_bandwidth = 1.;
+        costs.noc_cost_terms.latency = 1.;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 0.;
 
         // run the test function
         update_noc_normalization_factors(costs);
 
         // verify the congestion normalization factor
         // this should not be infinite
-        REQUIRE(costs.noc_congestion_cost_norm == 1.e3);
+        REQUIRE(costs.noc_cost_norm_factors.congestion == 1.e3);
     }
     SECTION("Test case where the congestion cost is lower than the smallest expected value") {
-        costs.noc_aggregate_bandwidth_cost = 1.;
-        costs.noc_latency_cost = 1.;
-        costs.noc_congestion_cost = 999.e-15;
+        costs.noc_cost_terms.aggregate_bandwidth = 1.;
+        costs.noc_cost_terms.latency = 1.;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 999.e-15;
 
         // run the test function
         update_noc_normalization_factors(costs);
 
         // verify the congestion normalization factor
         // this should not be infinite
-        REQUIRE(costs.noc_congestion_cost_norm == 1.e3);
+        REQUIRE(costs.noc_cost_norm_factors.congestion == 1.e3);
     }
     SECTION("Test case where the congestion cost is an expected value") {
-        costs.noc_aggregate_bandwidth_cost = 1.;
-        costs.noc_latency_cost = 1.;
-        costs.noc_congestion_cost = 1.e2;
+        costs.noc_cost_terms.aggregate_bandwidth = 1.;
+        costs.noc_cost_terms.latency = 1.;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 1.e2;
 
         // run the test function
         update_noc_normalization_factors(costs);
 
         // verify the congestion normalization factor
-        REQUIRE(costs.noc_congestion_cost_norm == 1.e-2);
+        REQUIRE(costs.noc_cost_norm_factors.congestion == 1.e-2);
     }
 }
 TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
@@ -1585,7 +1614,7 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
             }
 
             // re-route the traffic flow
-            noc_ctx.noc_flows_router->route_flow(router_where_cluster_is_placed[curr_traffic_flow.source_router_cluster_id], router_where_cluster_is_placed[curr_traffic_flow.sink_router_cluster_id], golden_traffic_flow_routes[traffic_flow], noc_ctx.noc_model);
+            noc_ctx.noc_flows_router->route_flow(router_where_cluster_is_placed[curr_traffic_flow.source_router_cluster_id],router_where_cluster_is_placed[curr_traffic_flow.sink_router_cluster_id], golden_traffic_flow_routes[traffic_flow], noc_ctx.noc_model);
 
             // go through the current traffic flow and reduce the bandwidths of the links (we only update this in the NoC, since these changes should be rectified by the test function)
             // This shouldn't be updated in the golden bandwidths since we are imitating a swap of blocks and not having a real swap of blocks
@@ -1788,14 +1817,15 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
 
     // we need to route all the traffic flows based on their initial positions
     for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number);
+        const auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number;
+        const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
         // get the source and sink routers of this traffic flow
         int source_hard_router_id = (size_t)curr_traffic_flow.source_router_cluster_id;
         int sink_hard_routed_id = (size_t)curr_traffic_flow.sink_router_cluster_id;
 
         // get the current traffic flow route
-        auto& traffic_flow_route = golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number];
+        auto& traffic_flow_route = golden_traffic_flow_routes[traffic_flow_id];
         double traffic_flow_bandwidth = curr_traffic_flow.traffic_flow_bandwidth;
 
         // route it
@@ -1810,20 +1840,23 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         }
     }
 
-    // variables below store the expected noc costs (latency and bandwidth)
+    // variables below store the expected noc cost terms
     t_placer_costs costs;
-    costs.noc_aggregate_bandwidth_cost = 0.;
-    costs.noc_latency_cost = 0.;
-    costs.noc_congestion_cost = 0.;
+    costs.noc_cost_terms.aggregate_bandwidth = 0.;
+    costs.noc_cost_terms.latency = 0.;
+    costs.noc_cost_terms.latency_overrun = 0.;
+    costs.noc_cost_terms.congestion = 0.;
 
     for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number);
+        const auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number;
+        const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
         double curr_bandwidth_cost = 0.;
         double curr_latency_cost = 0.;
+        double curr_latency_overrun_cost = 0.;
 
         // get the traffic flow route
-        const auto& golden_traffic_flow_route = golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number];
+        const auto& golden_traffic_flow_route = golden_traffic_flow_routes[traffic_flow_id];
 
         // calculate the bandwidth cost
         curr_bandwidth_cost = golden_traffic_flow_route.size() * curr_traffic_flow.traffic_flow_bandwidth;
@@ -1832,17 +1865,20 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_route.size() + 1)) + (link_latency * golden_traffic_flow_route.size());
 
         // calculate the latency cost
-        curr_latency_cost = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
+        curr_latency_cost = curr_traffic_flow_latency;
+        curr_latency_overrun_cost = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
         curr_latency_cost *= curr_traffic_flow.traffic_flow_priority;
+        curr_latency_overrun_cost *= curr_traffic_flow.traffic_flow_priority;
 
-        costs.noc_aggregate_bandwidth_cost += curr_bandwidth_cost;
-        costs.noc_latency_cost += curr_latency_cost;
+        costs.noc_cost_terms.aggregate_bandwidth += curr_bandwidth_cost;
+        costs.noc_cost_terms.latency += curr_latency_cost;
+        costs.noc_cost_terms.latency_overrun += curr_latency_overrun_cost;
     }
 
     // calculate the congestion cost
     for (const auto& noc_link : noc_ctx.noc_model.get_noc_links()) {
-        double curr_congestion_cost = noc_opts.noc_congestion_weighting * noc_link.get_congested_bandwidth_ratio();
-        costs.noc_congestion_cost += curr_congestion_cost;
+        double curr_congestion_cost = noc_link.get_congested_bandwidth_ratio();
+        costs.noc_cost_terms.congestion += curr_congestion_cost;
     }
 
     // this defines the error tolerance that is allowed between the golden noc costs and the costs found by the test function: check_noc_placement_costs
@@ -1858,19 +1894,25 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
     }
     SECTION("Case where the check place fails for both NoC costs") {
         // we need to make the aggregate bandwidth cost and latency cost be a value that is larger or smaller than the tolerance value
-        costs.noc_aggregate_bandwidth_cost += (costs.noc_aggregate_bandwidth_cost * error_tolerance * 2);
-        costs.noc_latency_cost -= (costs.noc_latency_cost * error_tolerance * 2);
-        if (costs.noc_congestion_cost == 0) {
-            costs.noc_congestion_cost += MIN_EXPECTED_NOC_CONGESTION_COST * error_tolerance * 2;
+        costs.noc_cost_terms.aggregate_bandwidth += (costs.noc_cost_terms.aggregate_bandwidth * error_tolerance * 2);
+        costs.noc_cost_terms.latency -= (costs.noc_cost_terms.latency * error_tolerance * 2);
+        if (costs.noc_cost_terms.latency_overrun == 0) {
+            costs.noc_cost_terms.latency_overrun += MIN_EXPECTED_NOC_LATENCY_COST * error_tolerance * 2;
+        } else {
+            costs.noc_cost_terms.latency_overrun += costs.noc_cost_terms.latency_overrun * error_tolerance * 2;
+        }
+
+        if (costs.noc_cost_terms.congestion == 0) {
+            costs.noc_cost_terms.congestion += MIN_EXPECTED_NOC_CONGESTION_COST * error_tolerance * 2;
         } else {
-            costs.noc_congestion_cost += costs.noc_congestion_cost * error_tolerance * 2;
+            costs.noc_cost_terms.congestion += costs.noc_cost_terms.congestion * error_tolerance * 2;
         }
 
         // run the test function
         int error = check_noc_placement_costs(costs, error_tolerance, noc_opts);
 
-        // we expect error to be 3 here, meaning the found costs are not within the tolerance range
-        REQUIRE(error == 3);
+        // we expect error to be 4 here, meaning the found costs are not within the tolerance range
+        REQUIRE(error == 4);
     }
 }
 } // namespace

From b5a4c81612c889fdc8a1e70bfe4ad82a0c95704a Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 1 Feb 2024 18:54:31 -0500
Subject: [PATCH 29/41] NoC cost weighting factors add up to 1

---
 vpr/src/base/vpr_types.h          |  1 +
 vpr/src/place/noc_place_utils.cpp | 15 +++++++++++++--
 vpr/src/place/noc_place_utils.h   |  2 ++
 vpr/src/place/place.cpp           |  4 ++++
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index b57caa24999..a3802a89870 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1496,6 +1496,7 @@ struct t_noc_opts {
     std::string noc_flows_file;               ///<name of the file that contains all the traffic flow information to be sent over the NoC in this design
     std::string noc_routing_algorithm;        ///<controls the routing algorithm used to route packets within the NoC
     double noc_placement_weighting;           ///<controls the significance of the NoC placement cost relative to the total placement cost range:[0-inf)
+    double noc_aggregate_bandwidth_weighting;
     double noc_latency_constraints_weighting; ///<controls the significance of meeting the traffic flow contraints range:[0-inf)
     double noc_latency_weighting;             ///<controls the significance of the traffic flow latencies relative to the other NoC placement costs range:[0-inf)
     double noc_congestion_weighting;          ///<controls the significance of the link congestions relative to the other NoC placement costs range:[0-inf)
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index a806b0b9ea1..0afc87e57f8 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -586,6 +586,17 @@ double calculate_link_congestion_cost(const NocLink& link) {
     return congested_bw_ratio;
 }
 
+void normalize_noc_cost_weighting_factor(t_noc_opts& noc_opts) {
+
+    double weighting_factor_sum = noc_opts.noc_latency_weighting +
+                                  noc_opts.noc_latency_constraints_weighting +
+                                  noc_opts.noc_congestion_weighting;
+
+    VTR_ASSERT(weighting_factor_sum <= 1.0 && weighting_factor_sum >= 0.0);
+
+    noc_opts.noc_aggregate_bandwidth_weighting = 1.0 - weighting_factor_sum;
+}
+
 double calculate_noc_cost(const NocCostTerms& cost_terms,
                           const NocCostTerms& norm_factors,
                           const t_noc_opts& noc_opts) {
@@ -597,8 +608,8 @@ double calculate_noc_cost(const NocCostTerms& cost_terms,
      * 3) Link congestion costs
      */
     cost = noc_opts.noc_placement_weighting * (
-               cost_terms.aggregate_bandwidth * norm_factors.aggregate_bandwidth +
-               cost_terms.latency * norm_factors.latency * noc_opts.noc_latency_constraints_weighting +
+               cost_terms.aggregate_bandwidth * norm_factors.aggregate_bandwidth * noc_opts.noc_aggregate_bandwidth_weighting +
+               cost_terms.latency * norm_factors.latency * noc_opts.noc_latency_weighting +
                cost_terms.latency_overrun * norm_factors.latency_overrun * noc_opts.noc_latency_constraints_weighting +
                cost_terms.congestion * norm_factors.congestion * noc_opts.noc_congestion_weighting);
 
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index 58f041795dd..db041b59cb0 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -395,6 +395,8 @@ std::pair<double, double> calculate_traffic_flow_latency_cost(const std::vector<
  */
 double calculate_link_congestion_cost(const NocLink& link);
 
+void normalize_noc_cost_weighting_factor(t_noc_opts& noc_opts);
+
 /**
  * @brief Computes a weighted average of NoC cost term to determine
  * NoC's contribution to the total placement cost.
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 8eed5b14518..b2e45fb3de0 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -656,6 +656,10 @@ void try_place(const Netlist<>& net_list,
 
     vtr::ScopedStartFinishTimer timer("Placement");
 
+    if (noc_opts.noc) {
+        normalize_noc_cost_weighting_factor(const_cast<t_noc_opts&>(noc_opts));
+    }
+
     initial_placement(placer_opts,
                       placer_opts.constraints_file.c_str(),
                       noc_opts);

From 4b9d80486f2ef4492232148df4e2f77d8694086c Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 1 Feb 2024 18:57:46 -0500
Subject: [PATCH 30/41] Update normalization factors during NoC initial
 placement

---
 vpr/src/place/initial_noc_placement.cpp | 75 ++++++++++++++++++++++++-
 1 file changed, 74 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp
index 27424e67d6a..f4e4d53d1d4 100644
--- a/vpr/src/place/initial_noc_placement.cpp
+++ b/vpr/src/place/initial_noc_placement.cpp
@@ -3,6 +3,7 @@
 #include "initial_placement.h"
 #include "noc_place_utils.h"
 #include "noc_place_checkpoint.h"
+#include "vtr_math.h"
 
 /**
  * @brief Evaluates whether a NoC router swap should be accepted or not.
@@ -32,7 +33,8 @@ static void place_constrained_noc_router(ClusterBlockId router_blk_id);
  *   NoC routers.
  *   @param seed Used for shuffling NoC routers.
  */
-static void place_noc_routers_randomly(std::vector<ClusterBlockId>& unfixed_routers, int seed);
+static void place_noc_routers_randomly(std::vector<ClusterBlockId>& unfixed_routers,
+                                       int seed);
 
 /**
  * @brief Runs a simulated annealing optimizer for NoC routers.
@@ -41,6 +43,16 @@ static void place_noc_routers_randomly(std::vector<ClusterBlockId>& unfixed_rout
  */
 static void noc_routers_anneal(const t_noc_opts& noc_opts);
 
+/**
+ * @brief Check whether normalization factors need to be updated.
+ *
+ *   @param costs Most recent NoC cost terms.
+ *   @param old_costs NoC cost terms from the last time normalization
+ *   factors were updated.
+ */
+static bool is_renormalization_needed(const t_placer_costs& costs,
+                                      const t_placer_costs& old_costs);
+
 static bool accept_noc_swap(double delta_cost, double prob) {
     if (delta_cost <= 0.0) {
         return true;
@@ -58,6 +70,46 @@ static bool accept_noc_swap(double delta_cost, double prob) {
     }
 }
 
+static bool is_renormalization_needed(const t_placer_costs& costs,
+                                      const t_placer_costs& old_costs) {
+    constexpr double COST_DIFF_TOLERANCE = 0.1;
+    bool renormalization_needed = false;
+
+    // aggregate bandwidth has changed significantly
+    renormalization_needed |= !vtr::isclose(costs.noc_cost_terms.aggregate_bandwidth,
+                                            old_costs.noc_cost_terms.aggregate_bandwidth,
+                                            COST_DIFF_TOLERANCE,
+                                            0.);
+
+    // latency cost has changed significantly
+    renormalization_needed |= !vtr::isclose(costs.noc_cost_terms.latency,
+                                            old_costs.noc_cost_terms.latency,
+                                            COST_DIFF_TOLERANCE,
+                                            0.);
+
+    // if both old and new latency overrun costs are too small, ignore their difference
+    // Too small latency overrun costs are the result of round-off error
+    if (costs.noc_cost_terms.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST ||
+        old_costs.noc_cost_terms.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST) {
+        renormalization_needed |= !vtr::isclose(costs.noc_cost_terms.latency_overrun,
+                                                old_costs.noc_cost_terms.latency_overrun,
+                                                COST_DIFF_TOLERANCE,
+                                                0.);
+    }
+
+    // if both old and new congestion costs are too small, ignore their difference
+    // Too small congestion costs are the result of round-off error
+    if (costs.noc_cost_terms.congestion > MIN_EXPECTED_NOC_CONGESTION_COST ||
+        old_costs.noc_cost_terms.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) {
+        renormalization_needed |= !vtr::isclose(costs.noc_cost_terms.congestion,
+                                                old_costs.noc_cost_terms.congestion,
+                                                COST_DIFF_TOLERANCE,
+                                                0.);
+    }
+
+    return renormalization_needed;
+}
+
 static void place_constrained_noc_router(ClusterBlockId router_blk_id) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& floorplanning_ctx = g_vpr_ctx.floorplanning();
@@ -156,6 +208,8 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
 
     // Only NoC related costs are considered
     t_placer_costs costs;
+    // NoC costs from the last time normalization factors were updated
+    t_placer_costs old_costs;
 
     // Initialize NoC-related costs
     costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost();
@@ -163,6 +217,8 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     costs.noc_cost_terms.congestion = comp_noc_congestion_cost();
     update_noc_normalization_factors(costs);
     costs.cost = calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts);
+    old_costs = costs;
+
 
     // Maximum distance in each direction that a router can travel in a move
     // It is assumed that NoC routers are organized in a square grid.
@@ -182,6 +238,9 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     const int N_MOVES_PER_ROUTER = 35000;
     const int N_MOVES = num_router_clusters * N_MOVES_PER_ROUTER;
 
+    const int RENORMALIZATION_LIM = 1024;
+    int renormalization_cnt = 0;
+
     const double starting_prob = 0.5;
     const double prob_step = starting_prob / N_MOVES;
 
@@ -201,6 +260,9 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
      * Range limit and the probability of accepting swaps with positive delta cost
      * decrease linearly as more swaps are evaluated. Late in the annealing,
      * NoC routers are swapped only with their neighbors as the range limit approaches 1.
+     *
+     * After each RENORMALIZATION_LIM accepted moves, if NoC cost terms have changed
+     * significantly, I update the normalization factors and re-compute the total cost.
      */
 
     // Generate and evaluate router moves
@@ -230,6 +292,17 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
                 if (costs.cost < checkpoint.get_cost() || !checkpoint.is_valid()) {
                     checkpoint.save_checkpoint(costs.cost);
                 }
+
+                renormalization_cnt++;
+                if (renormalization_cnt == RENORMALIZATION_LIM) {
+                    renormalization_cnt = 0;
+                    if (is_renormalization_needed(costs, old_costs)) {
+                        update_noc_normalization_factors(costs);
+                        costs.cost = calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts);
+                        old_costs = costs;
+                    }
+                }
+
             } else { // The proposed move is rejected
                 revert_move_blocks(blocks_affected);
                 revert_noc_traffic_flow_routes(blocks_affected);

From 37426cb82ac3fa127bf6f5b15361354d059c4521 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 1 Feb 2024 20:36:48 -0500
Subject: [PATCH 31/41] parse new noc metrics

---
 vpr/src/place/place.cpp                 | 53 +++++++++++++++----------
 vtr_flow/parse/parse_config/vpr_noc.txt | 10 +++--
 2 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index b2e45fb3de0..93287555b5f 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -828,17 +828,19 @@ void try_place(const Netlist<>& net_list,
             costs.bb_cost, costs.timing_cost);
     if (noc_opts.noc) {
         VTR_LOG("NoC Placement Costs. "
-            "noc cost: %g, "
-            "noc_aggregate_bandwidth_cost: %g, "
-            "noc_latency_cost: %g, "
-            "noc_latency_constraints_cost: %d, "
-            "noc_congestion_cost: %g, "
+            "cost: %g, "
+            "aggregate_bandwidth_cost: %g, "
+            "latency_cost: %g, "
+            "n_met_latency_constraints: %d, "
+            "latency_overrun_cost: %g, "
+            "congestion_cost: %g, "
             "accum_congested_ratio: %g, "
             "n_congested_links: %d \n",
             calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts),
             costs.noc_cost_terms.aggregate_bandwidth,
             costs.noc_cost_terms.latency,
             get_number_of_traffic_flows_with_latency_cons_met(),
+            costs.noc_cost_terms.latency_overrun,
             costs.noc_cost_terms.congestion,
             get_total_congestion_bandwidth_ratio(),
             get_number_of_congested_noc_links());
@@ -876,20 +878,25 @@ void try_place(const Netlist<>& net_list,
     if (noc_opts.noc) {
         sprintf(msg,
                 "\nInitial NoC Placement Costs. "
-                "noc cost: %g, "
-                "noc_aggregate_bandwidth_cost: %g "
-                "noc_latency_cost: %g "
-                "noc_latency_constraints_cost: %d "
-                "noc_congestion_cost: %g "
+                "cost: %g, "
+                "aggregate_bandwidth_cost: %g, "
+                "latency_cost: %g, "
+                "n_met_latency_constraints: %d, "
+                "latency_overrun_cost: %g, "
+                "congestion_cost: %g, "
                 "accum_congested_ratio: %g, "
-                "n_congested_links: %d",
+                "n_congested_links: %d \n",
                 calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts),
                 costs.noc_cost_terms.aggregate_bandwidth,
                 costs.noc_cost_terms.latency,
                 get_number_of_traffic_flows_with_latency_cons_met(),
+                costs.noc_cost_terms.latency_overrun,
                 costs.noc_cost_terms.congestion,
                 get_total_congestion_bandwidth_ratio(),
                 get_number_of_congested_noc_links());
+
+
+
     }
     //Draw the initial placement
     update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info);
@@ -1201,33 +1208,37 @@ void try_place(const Netlist<>& net_list,
     if (noc_opts.noc) {
         sprintf(msg,
                 "\nNoC Placement Costs. "
-                "noc cost: %g, "
-                "noc_aggregate_bandwidth_cost: %g, "
-                "noc_latency_cost: %g, "
-                "noc_latency_constraints_cost: %d, "
-                "noc_congestion_cost: %g, "
+                "cost: %g, "
+                "aggregate_bandwidth_cost: %g, "
+                "latency_cost: %g, "
+                "n_met_latency_constraints: %d, "
+                "latency_overrun_cost: %g, "
+                "congestion_cost: %g, "
                 "accum_congested_ratio: %g, "
                 "n_congested_links: %d \n",
                 calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts),
                 costs.noc_cost_terms.aggregate_bandwidth,
                 costs.noc_cost_terms.latency,
                 get_number_of_traffic_flows_with_latency_cons_met(),
+                costs.noc_cost_terms.latency_overrun,
                 costs.noc_cost_terms.congestion,
                 get_total_congestion_bandwidth_ratio(),
                 get_number_of_congested_noc_links());
 
         VTR_LOG("\nNoC Placement Costs. "
-            "noc cost: %g, "
-            "noc_aggregate_bandwidth_cost: %g, "
-            "noc_latency_cost: %g, "
-            "noc_latency_constraints_cost: %d, "
-            "noc_congestion_cost: %g, "
+            "cost: %g, "
+            "aggregate_bandwidth_cost: %g, "
+            "latency_cost: %g, "
+            "n_met_latency_constraints: %d, "
+            "latency_overrun_cost: %g, "
+            "congestion_cost: %g, "
             "accum_congested_ratio: %g, "
             "n_congested_links: %d \n",
             calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts),
             costs.noc_cost_terms.aggregate_bandwidth,
             costs.noc_cost_terms.latency,
             get_number_of_traffic_flows_with_latency_cons_met(),
+            costs.noc_cost_terms.latency_overrun,
             costs.noc_cost_terms.congestion,
             get_total_congestion_bandwidth_ratio(),
             get_number_of_congested_noc_links());
diff --git a/vtr_flow/parse/parse_config/vpr_noc.txt b/vtr_flow/parse/parse_config/vpr_noc.txt
index f9a9a4440ac..51b7c194712 100644
--- a/vtr_flow/parse/parse_config/vpr_noc.txt
+++ b/vtr_flow/parse/parse_config/vpr_noc.txt
@@ -11,6 +11,10 @@
 %include "timing/vpr.route_min_chan_width.txt"
 %include "timing/vpr.route_relaxed_chan_width.txt"
 
-NoC_agg_bandwidth;vpr.out;NoC Placement Costs. noc_aggregate_bandwidth_cost: (.*), noc_latency_cost: .*, noc_latency_constraints_cost: .*,
-NoC_latency;vpr.out;NoC Placement Costs. noc_aggregate_bandwidth_cost: .*, noc_latency_cost: (.*), noc_latency_constraints_cost: .*,
-NoC_latency_constraints_cost;vpr.out;NoC Placement Costs. noc_aggregate_bandwidth_cost: .*, noc_latency_cost: .*, noc_latency_constraints_cost: (.*),
\ No newline at end of file
+NoC_agg_bandwidth;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: (.*), latency_cost: .*, n_met_latency_constraints: .*, latency_overrun_cost: .*, congestion_cost: .*, accum_congested_ratio: .*, n_congested_links: .*
+NoC_latency;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: .*, latency_cost: (.*), n_met_latency_constraints: .*, latency_overrun_cost: .*, congestion_cost: .*, accum_congested_ratio: .*, n_congested_links: .*
+NoC_n_met_latency_constraints;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: .*, latency_cost: .*, n_met_latency_constraints: (.*), latency_overrun_cost: .*, congestion_cost: .*, accum_congested_ratio: .*, n_congested_links: .*
+NoC_latency_overrun;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: .*, latency_cost: .*, n_met_latency_constraints: .*, latency_overrun_cost: (.*), congestion_cost: .*, accum_congested_ratio: .*, n_congested_links: .*
+NoC_congested_bw;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: .*, latency_cost: .*, n_met_latency_constraints: .*, latency_overrun_cost: .*, congestion_cost: (.*), accum_congested_ratio: .*, n_congested_links: .*
+NoC_congestion_ratio;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: .*, latency_cost: .*, n_met_latency_constraints: .*, latency_overrun_cost: .*, congestion_cost: .*, accum_congested_ratio: (.*), n_congested_links: .*
+NoC_n_congested_links;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: .*, latency_cost: .*, n_met_latency_constraints: .*, latency_overrun_cost: .*, congestion_cost: .*, accum_congested_ratio: .*, n_congested_links: (.*)

From 2d3e642f42738987c219b5b14f3f23fdcd435b5a Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Fri, 2 Feb 2024 15:34:13 -0500
Subject: [PATCH 32/41] Add include_temp to vtr task syntax

When I pass rr graph and router lookahead files to VPR, it throws an error. capnproto uses mmap to open these files. It seems that multiple processes can access a single file using mmap. However, I cannot trust capnproto. The changes in this commit enhace the vtr task syntax by allowing copying arbitrary files to the temporay directory. This way, I can copy rr graph file and prevent multiple processes accessing the same file.
---
 vtr_flow/scripts/python_libs/vtr/flow.py | 24 ++++++++++++-
 vtr_flow/scripts/python_libs/vtr/task.py | 45 ++++++++++++++++++++++--
 vtr_flow/scripts/run_vtr_flow.py         |  9 +++++
 3 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/vtr_flow/scripts/python_libs/vtr/flow.py b/vtr_flow/scripts/python_libs/vtr/flow.py
index a1f14e2816e..d7ab0498fdc 100644
--- a/vtr_flow/scripts/python_libs/vtr/flow.py
+++ b/vtr_flow/scripts/python_libs/vtr/flow.py
@@ -1,6 +1,7 @@
 """
     Module to run the VTR flow. This module calls other modules that then access the tools like VPR.
 """
+import os
 import shutil
 from pathlib import Path
 from collections import OrderedDict
@@ -36,6 +37,7 @@ def run(
     circuit_file,
     power_tech_file=None,
     include_files=None,
+    include_temp_files=None,
     start_stage=VtrStage.PARMYS,
     end_stage=VtrStage.VPR,
     command_runner=vtr.CommandRunner(),
@@ -176,7 +178,7 @@ def run(
     shutil.copy(str(circuit_file), str(circuit_copy))
     shutil.copy(str(architecture_file), str(architecture_copy))
 
-    # Check whether any inclulde is specified
+    # Check whether any include is specified
     if include_files:
         # Verify include files are Paths or convert them to Path + check that they exist
         # Copy include files to the run directory
@@ -185,6 +187,17 @@ def run(
             include_copy = temp_dir / include_file.name
             shutil.copy(str(include), str(include_copy))
 
+
+    # Check whether any include is specified
+    if include_temp_files:
+        # Verify include files are Paths or convert them to Path + check that they exist
+        # Copy temp include files to the run directory
+        for include_temp in include_temp_files:
+            include_temp_file = vtr.util.verify_file(include_temp, "Temporary Include")
+            include_temp_copy = temp_dir / include_temp_file.name
+            shutil.copy(str(include_temp), str(include_temp_copy))
+
+
     # There are multiple potential paths for the netlist to reach a tool
     # We initialize it here to the user specified circuit and let downstream
     # stages update it
@@ -384,6 +397,15 @@ def run(
             power_tech_file,
         )
 
+    # Check whether any temporary include is specified
+    if include_temp_files:
+        # Verify temp include files are Paths or convert them to Path + check that they exist
+        # Then find
+        for include_temp in include_temp_files:
+            include_temp_file = vtr.util.verify_file(include_temp, "Temporary Include")
+            include_temp_copy = temp_dir / include_temp_file.name
+            os.remove(str(include_temp_copy))
+
 
 # pylint: enable=too-many-arguments, too-many-locals, too-many-branches, too-many-statements
 
diff --git a/vtr_flow/scripts/python_libs/vtr/task.py b/vtr_flow/scripts/python_libs/vtr/task.py
index 8959d836ea9..ae06a27e5ad 100644
--- a/vtr_flow/scripts/python_libs/vtr/task.py
+++ b/vtr_flow/scripts/python_libs/vtr/task.py
@@ -39,6 +39,8 @@ def __init__(
         parse_file,
         includes_dir=None,
         include_list_add=None,
+        include_temp_dir=None,
+        include_temp_list_add=None,
         second_parse_file=None,
         script_path=None,
         script_params=None,
@@ -64,6 +66,8 @@ def __init__(
         self.archs = arch_list_add
         self.include_dir = includes_dir
         self.includes = include_list_add
+        self.include_temp_dir = include_temp_dir
+        self.include_temps = include_temp_list_add
         self.parse_file = parse_file
         self.second_parse_file = second_parse_file
         self.script_path = script_path
@@ -97,6 +101,7 @@ def __init__(
         arch,
         circuit,
         include,
+        include_temp,
         script_params,
         work_dir,
         run_command,
@@ -108,6 +113,7 @@ def __init__(
         self._arch = arch
         self._circuit = circuit
         self._include = include
+        self._include_temp = include_temp
         self._script_params = script_params
         self._run_command = run_command
         self._parse_command = parse_command
@@ -139,6 +145,12 @@ def include(self):
         """
         return self._include
 
+    def include_temp(self):
+        """
+        return the list of temporary include files of the job.
+        """
+        return self._include_temp
+
     def script_params(self):
         """
         return the script parameter of the job
@@ -197,6 +209,7 @@ def load_task_config(config_file) -> TaskConfig:
         [
             "circuits_dir",
             "includes_dir",
+            "include_temp_dir",
             "archs_dir",
             "additional_files",
             "parse_file",
@@ -264,6 +277,7 @@ def load_task_config(config_file) -> TaskConfig:
 
     check_required_fields(config_file, required_keys, key_values)
     check_include_fields(config_file, key_values)
+    check_include_temp_fields(config_file, key_values)
 
     # Useful meta-data about the config
     config_dir = str(Path(config_file).parent)
@@ -300,6 +314,18 @@ def check_include_fields(config_file, key_values):
                 )
             )
 
+def check_include_temp_fields(config_file, key_values):
+    """
+    Check that include_temp_dir was specified if some files to temporarily include
+    in the designs (include_temp_list_add) was specified.
+    """
+    if "include_temp_list_add" in key_values:
+        if "include_temp_dir" not in key_values:
+            raise VtrError(
+                "Missing required key '{key}' in config file {file}".format(
+                    key="include_temp_dir", file=config_file
+                )
+            )
 
 def shorten_task_names(configs, common_task_prefix):
     """
@@ -377,6 +403,17 @@ def create_cmd(
 
         cmd += includes
 
+    # Resolve and collect all include_temp paths in the config file
+    # as -include_temp ["include_temp1", "include_temp2", ..]
+    include_temps = []
+    if config.include_temps:
+        cmd += ["-include_temp"]
+        for include_temp in config.include_temps:
+            abs_include_filepath = resolve_vtr_source_file(config, include_temp, config.include_temp_dir)
+            include_temps.append(abs_include_filepath)
+
+        cmd += include_temps
+
     # Check if additional architectural data files are present
     if config.additional_files_list_add:
         for additional_file in config.additional_files_list_add:
@@ -466,7 +503,7 @@ def create_cmd(
             resolve_vtr_source_file(config, noc_traffic, config.noc_traffic_dir),
         ]
 
-    return includes, parse_cmd, second_parse_cmd, qor_parse_command, cmd
+    return includes, include_temps, parse_cmd, second_parse_cmd, qor_parse_command, cmd
 
 
 # pylint: disable=too-many-branches
@@ -509,7 +546,7 @@ def create_jobs(args, configs, after_run=False) -> List[Job]:
                 )
             )
 
-            includes, parse_cmd, second_parse_cmd, qor_parse_command, cmd = create_cmd(
+            includes, include_temps, parse_cmd, second_parse_cmd, qor_parse_command, cmd = create_cmd(
                 abs_circuit_filepath, abs_arch_filepath, config, args, circuit, noc_traffic
             )
 
@@ -521,6 +558,7 @@ def create_jobs(args, configs, after_run=False) -> List[Job]:
                             config,
                             circuit,
                             includes,
+                            include_temps,
                             arch,
                             noc_traffic,
                             value,
@@ -540,6 +578,7 @@ def create_jobs(args, configs, after_run=False) -> List[Job]:
                         config,
                         circuit,
                         includes,
+                        include_temps,
                         arch,
                         noc_traffic,
                         None,
@@ -561,6 +600,7 @@ def create_job(
     config,
     circuit,
     include,
+    include_temp,
     arch,
     noc_flow,
     param,
@@ -658,6 +698,7 @@ def create_job(
         arch,
         circuit,
         include,
+        include_temp,
         param_string,
         work_dir + "/" + param_string,
         current_cmd,
diff --git a/vtr_flow/scripts/run_vtr_flow.py b/vtr_flow/scripts/run_vtr_flow.py
index 3d584274e3f..00973a7e62e 100755
--- a/vtr_flow/scripts/run_vtr_flow.py
+++ b/vtr_flow/scripts/run_vtr_flow.py
@@ -155,6 +155,14 @@ def vtr_command_argparser(prog=None):
         help="List of include files to a benchmark circuit (pass to VTR"
         + " frontends as a benchmark design set)",
     )
+    parser.add_argument(
+        "-include_temp",
+        nargs="*",
+        default=None,
+        dest="include_temp_list_file",
+        help="List of include files to be copied to the working directory"
+             + " and be removed when the task is done)",
+    )
 
     #
     # Power arguments
@@ -561,6 +569,7 @@ def vtr_command_main(arg_list, prog=None):
             Path(args.circuit_file),
             power_tech_file=args.power_tech,
             include_files=args.include_list_file,
+            include_temp_files=args.include_temp_list_file,
             temp_dir=temp_dir,
             start_stage=args.start,
             end_stage=args.end,

From a440aa502b808a8a099d9204c5cbe484b93a1caa Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Fri, 2 Feb 2024 15:36:03 -0500
Subject: [PATCH 33/41] update PWD environment variable before spawning a
 subprocess

The previous commits did not work. It seems that capnproto uses PWD environment variable instead of calling getcwd(). popen method changes the working directory, but does not update PWD. I update it manually.
---
 vtr_flow/scripts/python_libs/vtr/util.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/vtr_flow/scripts/python_libs/vtr/util.py b/vtr_flow/scripts/python_libs/vtr/util.py
index 14d7e519ede..a1b8c598405 100644
--- a/vtr_flow/scripts/python_libs/vtr/util.py
+++ b/vtr_flow/scripts/python_libs/vtr/util.py
@@ -8,6 +8,7 @@
 import subprocess
 import argparse
 import csv
+import os
 
 from collections import OrderedDict
 from pathlib import PurePath
@@ -145,12 +146,15 @@ def run_system_command(
         try:
             # Call the command
             stderr = None if self._valgrind else subprocess.STDOUT
+            modified_environ = os.environ.copy()
+            modified_environ['PWD'] = str(temp_dir)
             proc = subprocess.Popen(
                 cmd,
                 stdout=subprocess.PIPE,  # We grab stdout
                 stderr=stderr,  # stderr redirected to stderr
                 universal_newlines=True,  # Lines always end in \n
                 cwd=str(temp_dir),  # Where to run the command
+                env=modified_environ
             )
 
             # Read the output line-by-line and log it

From a0de5f17add5f56eb5d98f52c182992844fe3d5d Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Fri, 2 Feb 2024 17:05:11 -0500
Subject: [PATCH 34/41] Revert "Add include_temp to vtr task syntax"

This reverts commit 2d3e642f42738987c219b5b14f3f23fdcd435b5a.
---
 vtr_flow/scripts/python_libs/vtr/flow.py | 24 +------------
 vtr_flow/scripts/python_libs/vtr/task.py | 45 ++----------------------
 vtr_flow/scripts/run_vtr_flow.py         |  9 -----
 3 files changed, 3 insertions(+), 75 deletions(-)

diff --git a/vtr_flow/scripts/python_libs/vtr/flow.py b/vtr_flow/scripts/python_libs/vtr/flow.py
index d7ab0498fdc..a1f14e2816e 100644
--- a/vtr_flow/scripts/python_libs/vtr/flow.py
+++ b/vtr_flow/scripts/python_libs/vtr/flow.py
@@ -1,7 +1,6 @@
 """
     Module to run the VTR flow. This module calls other modules that then access the tools like VPR.
 """
-import os
 import shutil
 from pathlib import Path
 from collections import OrderedDict
@@ -37,7 +36,6 @@ def run(
     circuit_file,
     power_tech_file=None,
     include_files=None,
-    include_temp_files=None,
     start_stage=VtrStage.PARMYS,
     end_stage=VtrStage.VPR,
     command_runner=vtr.CommandRunner(),
@@ -178,7 +176,7 @@ def run(
     shutil.copy(str(circuit_file), str(circuit_copy))
     shutil.copy(str(architecture_file), str(architecture_copy))
 
-    # Check whether any include is specified
+    # Check whether any inclulde is specified
     if include_files:
         # Verify include files are Paths or convert them to Path + check that they exist
         # Copy include files to the run directory
@@ -187,17 +185,6 @@ def run(
             include_copy = temp_dir / include_file.name
             shutil.copy(str(include), str(include_copy))
 
-
-    # Check whether any include is specified
-    if include_temp_files:
-        # Verify include files are Paths or convert them to Path + check that they exist
-        # Copy temp include files to the run directory
-        for include_temp in include_temp_files:
-            include_temp_file = vtr.util.verify_file(include_temp, "Temporary Include")
-            include_temp_copy = temp_dir / include_temp_file.name
-            shutil.copy(str(include_temp), str(include_temp_copy))
-
-
     # There are multiple potential paths for the netlist to reach a tool
     # We initialize it here to the user specified circuit and let downstream
     # stages update it
@@ -397,15 +384,6 @@ def run(
             power_tech_file,
         )
 
-    # Check whether any temporary include is specified
-    if include_temp_files:
-        # Verify temp include files are Paths or convert them to Path + check that they exist
-        # Then find
-        for include_temp in include_temp_files:
-            include_temp_file = vtr.util.verify_file(include_temp, "Temporary Include")
-            include_temp_copy = temp_dir / include_temp_file.name
-            os.remove(str(include_temp_copy))
-
 
 # pylint: enable=too-many-arguments, too-many-locals, too-many-branches, too-many-statements
 
diff --git a/vtr_flow/scripts/python_libs/vtr/task.py b/vtr_flow/scripts/python_libs/vtr/task.py
index ae06a27e5ad..8959d836ea9 100644
--- a/vtr_flow/scripts/python_libs/vtr/task.py
+++ b/vtr_flow/scripts/python_libs/vtr/task.py
@@ -39,8 +39,6 @@ def __init__(
         parse_file,
         includes_dir=None,
         include_list_add=None,
-        include_temp_dir=None,
-        include_temp_list_add=None,
         second_parse_file=None,
         script_path=None,
         script_params=None,
@@ -66,8 +64,6 @@ def __init__(
         self.archs = arch_list_add
         self.include_dir = includes_dir
         self.includes = include_list_add
-        self.include_temp_dir = include_temp_dir
-        self.include_temps = include_temp_list_add
         self.parse_file = parse_file
         self.second_parse_file = second_parse_file
         self.script_path = script_path
@@ -101,7 +97,6 @@ def __init__(
         arch,
         circuit,
         include,
-        include_temp,
         script_params,
         work_dir,
         run_command,
@@ -113,7 +108,6 @@ def __init__(
         self._arch = arch
         self._circuit = circuit
         self._include = include
-        self._include_temp = include_temp
         self._script_params = script_params
         self._run_command = run_command
         self._parse_command = parse_command
@@ -145,12 +139,6 @@ def include(self):
         """
         return self._include
 
-    def include_temp(self):
-        """
-        return the list of temporary include files of the job.
-        """
-        return self._include_temp
-
     def script_params(self):
         """
         return the script parameter of the job
@@ -209,7 +197,6 @@ def load_task_config(config_file) -> TaskConfig:
         [
             "circuits_dir",
             "includes_dir",
-            "include_temp_dir",
             "archs_dir",
             "additional_files",
             "parse_file",
@@ -277,7 +264,6 @@ def load_task_config(config_file) -> TaskConfig:
 
     check_required_fields(config_file, required_keys, key_values)
     check_include_fields(config_file, key_values)
-    check_include_temp_fields(config_file, key_values)
 
     # Useful meta-data about the config
     config_dir = str(Path(config_file).parent)
@@ -314,18 +300,6 @@ def check_include_fields(config_file, key_values):
                 )
             )
 
-def check_include_temp_fields(config_file, key_values):
-    """
-    Check that include_temp_dir was specified if some files to temporarily include
-    in the designs (include_temp_list_add) was specified.
-    """
-    if "include_temp_list_add" in key_values:
-        if "include_temp_dir" not in key_values:
-            raise VtrError(
-                "Missing required key '{key}' in config file {file}".format(
-                    key="include_temp_dir", file=config_file
-                )
-            )
 
 def shorten_task_names(configs, common_task_prefix):
     """
@@ -403,17 +377,6 @@ def create_cmd(
 
         cmd += includes
 
-    # Resolve and collect all include_temp paths in the config file
-    # as -include_temp ["include_temp1", "include_temp2", ..]
-    include_temps = []
-    if config.include_temps:
-        cmd += ["-include_temp"]
-        for include_temp in config.include_temps:
-            abs_include_filepath = resolve_vtr_source_file(config, include_temp, config.include_temp_dir)
-            include_temps.append(abs_include_filepath)
-
-        cmd += include_temps
-
     # Check if additional architectural data files are present
     if config.additional_files_list_add:
         for additional_file in config.additional_files_list_add:
@@ -503,7 +466,7 @@ def create_cmd(
             resolve_vtr_source_file(config, noc_traffic, config.noc_traffic_dir),
         ]
 
-    return includes, include_temps, parse_cmd, second_parse_cmd, qor_parse_command, cmd
+    return includes, parse_cmd, second_parse_cmd, qor_parse_command, cmd
 
 
 # pylint: disable=too-many-branches
@@ -546,7 +509,7 @@ def create_jobs(args, configs, after_run=False) -> List[Job]:
                 )
             )
 
-            includes, include_temps, parse_cmd, second_parse_cmd, qor_parse_command, cmd = create_cmd(
+            includes, parse_cmd, second_parse_cmd, qor_parse_command, cmd = create_cmd(
                 abs_circuit_filepath, abs_arch_filepath, config, args, circuit, noc_traffic
             )
 
@@ -558,7 +521,6 @@ def create_jobs(args, configs, after_run=False) -> List[Job]:
                             config,
                             circuit,
                             includes,
-                            include_temps,
                             arch,
                             noc_traffic,
                             value,
@@ -578,7 +540,6 @@ def create_jobs(args, configs, after_run=False) -> List[Job]:
                         config,
                         circuit,
                         includes,
-                        include_temps,
                         arch,
                         noc_traffic,
                         None,
@@ -600,7 +561,6 @@ def create_job(
     config,
     circuit,
     include,
-    include_temp,
     arch,
     noc_flow,
     param,
@@ -698,7 +658,6 @@ def create_job(
         arch,
         circuit,
         include,
-        include_temp,
         param_string,
         work_dir + "/" + param_string,
         current_cmd,
diff --git a/vtr_flow/scripts/run_vtr_flow.py b/vtr_flow/scripts/run_vtr_flow.py
index 00973a7e62e..3d584274e3f 100755
--- a/vtr_flow/scripts/run_vtr_flow.py
+++ b/vtr_flow/scripts/run_vtr_flow.py
@@ -155,14 +155,6 @@ def vtr_command_argparser(prog=None):
         help="List of include files to a benchmark circuit (pass to VTR"
         + " frontends as a benchmark design set)",
     )
-    parser.add_argument(
-        "-include_temp",
-        nargs="*",
-        default=None,
-        dest="include_temp_list_file",
-        help="List of include files to be copied to the working directory"
-             + " and be removed when the task is done)",
-    )
 
     #
     # Power arguments
@@ -569,7 +561,6 @@ def vtr_command_main(arg_list, prog=None):
             Path(args.circuit_file),
             power_tech_file=args.power_tech,
             include_files=args.include_list_file,
-            include_temp_files=args.include_temp_list_file,
             temp_dir=temp_dir,
             start_stage=args.start,
             end_stage=args.end,

From c33a6a8d14398ea6d3efdd2c9d1b871b3ca53919 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 6 Feb 2024 19:24:58 -0500
Subject: [PATCH 35/41] revert renormalization during init noc placement

---
 vpr/src/place/initial_noc_placement.cpp | 31 -------------------------
 1 file changed, 31 deletions(-)

diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp
index f4e4d53d1d4..75e6e6cb49e 100644
--- a/vpr/src/place/initial_noc_placement.cpp
+++ b/vpr/src/place/initial_noc_placement.cpp
@@ -43,16 +43,6 @@ static void place_noc_routers_randomly(std::vector<ClusterBlockId>& unfixed_rout
  */
 static void noc_routers_anneal(const t_noc_opts& noc_opts);
 
-/**
- * @brief Check whether normalization factors need to be updated.
- *
- *   @param costs Most recent NoC cost terms.
- *   @param old_costs NoC cost terms from the last time normalization
- *   factors were updated.
- */
-static bool is_renormalization_needed(const t_placer_costs& costs,
-                                      const t_placer_costs& old_costs);
-
 static bool accept_noc_swap(double delta_cost, double prob) {
     if (delta_cost <= 0.0) {
         return true;
@@ -208,8 +198,6 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
 
     // Only NoC related costs are considered
     t_placer_costs costs;
-    // NoC costs from the last time normalization factors were updated
-    t_placer_costs old_costs;
 
     // Initialize NoC-related costs
     costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost();
@@ -217,8 +205,6 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     costs.noc_cost_terms.congestion = comp_noc_congestion_cost();
     update_noc_normalization_factors(costs);
     costs.cost = calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts);
-    old_costs = costs;
-
 
     // Maximum distance in each direction that a router can travel in a move
     // It is assumed that NoC routers are organized in a square grid.
@@ -238,9 +224,6 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     const int N_MOVES_PER_ROUTER = 35000;
     const int N_MOVES = num_router_clusters * N_MOVES_PER_ROUTER;
 
-    const int RENORMALIZATION_LIM = 1024;
-    int renormalization_cnt = 0;
-
     const double starting_prob = 0.5;
     const double prob_step = starting_prob / N_MOVES;
 
@@ -260,9 +243,6 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
      * Range limit and the probability of accepting swaps with positive delta cost
      * decrease linearly as more swaps are evaluated. Late in the annealing,
      * NoC routers are swapped only with their neighbors as the range limit approaches 1.
-     *
-     * After each RENORMALIZATION_LIM accepted moves, if NoC cost terms have changed
-     * significantly, I update the normalization factors and re-compute the total cost.
      */
 
     // Generate and evaluate router moves
@@ -292,17 +272,6 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
                 if (costs.cost < checkpoint.get_cost() || !checkpoint.is_valid()) {
                     checkpoint.save_checkpoint(costs.cost);
                 }
-
-                renormalization_cnt++;
-                if (renormalization_cnt == RENORMALIZATION_LIM) {
-                    renormalization_cnt = 0;
-                    if (is_renormalization_needed(costs, old_costs)) {
-                        update_noc_normalization_factors(costs);
-                        costs.cost = calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts);
-                        old_costs = costs;
-                    }
-                }
-
             } else { // The proposed move is rejected
                 revert_move_blocks(blocks_affected);
                 revert_noc_traffic_flow_routes(blocks_affected);

From 2f078fea8fecc9c3f359950a87554cb3ca7fc6fe Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 7 Feb 2024 10:54:02 -0500
Subject: [PATCH 36/41] remove ununsed functions

---
 vpr/src/place/initial_noc_placement.cpp | 40 -------------------------
 1 file changed, 40 deletions(-)

diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp
index 75e6e6cb49e..f3298062886 100644
--- a/vpr/src/place/initial_noc_placement.cpp
+++ b/vpr/src/place/initial_noc_placement.cpp
@@ -60,46 +60,6 @@ static bool accept_noc_swap(double delta_cost, double prob) {
     }
 }
 
-static bool is_renormalization_needed(const t_placer_costs& costs,
-                                      const t_placer_costs& old_costs) {
-    constexpr double COST_DIFF_TOLERANCE = 0.1;
-    bool renormalization_needed = false;
-
-    // aggregate bandwidth has changed significantly
-    renormalization_needed |= !vtr::isclose(costs.noc_cost_terms.aggregate_bandwidth,
-                                            old_costs.noc_cost_terms.aggregate_bandwidth,
-                                            COST_DIFF_TOLERANCE,
-                                            0.);
-
-    // latency cost has changed significantly
-    renormalization_needed |= !vtr::isclose(costs.noc_cost_terms.latency,
-                                            old_costs.noc_cost_terms.latency,
-                                            COST_DIFF_TOLERANCE,
-                                            0.);
-
-    // if both old and new latency overrun costs are too small, ignore their difference
-    // Too small latency overrun costs are the result of round-off error
-    if (costs.noc_cost_terms.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST ||
-        old_costs.noc_cost_terms.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST) {
-        renormalization_needed |= !vtr::isclose(costs.noc_cost_terms.latency_overrun,
-                                                old_costs.noc_cost_terms.latency_overrun,
-                                                COST_DIFF_TOLERANCE,
-                                                0.);
-    }
-
-    // if both old and new congestion costs are too small, ignore their difference
-    // Too small congestion costs are the result of round-off error
-    if (costs.noc_cost_terms.congestion > MIN_EXPECTED_NOC_CONGESTION_COST ||
-        old_costs.noc_cost_terms.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) {
-        renormalization_needed |= !vtr::isclose(costs.noc_cost_terms.congestion,
-                                                old_costs.noc_cost_terms.congestion,
-                                                COST_DIFF_TOLERANCE,
-                                                0.);
-    }
-
-    return renormalization_needed;
-}
-
 static void place_constrained_noc_router(ClusterBlockId router_blk_id) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& floorplanning_ctx = g_vpr_ctx.floorplanning();

From bc3557ca9c4806b711e94bb2fb662cb845689b13 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 7 Feb 2024 11:18:33 -0500
Subject: [PATCH 37/41] updated default NoC placement weighting factors

---
 vpr/src/base/read_options.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 24aacf354ec..fe389a66a31 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2802,21 +2802,21 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
         .help(
             "Controls the importance of the NoC placement parameters relative to timing and wirelength of the design."
             "This value can be >=0, where 0 would mean the placement is based solely on timing and wirelength, a value of 1 would mean noc placement is considered equal to timing and wirelength and a value greater than 1 would mean the placement is increasingly dominated by NoC parameters.")
-        .default_value("0.6")
+        .default_value("5.0")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     noc_grp.add_argument<double>(args.noc_latency_constraints_weighting, "--noc_latency_constraints_weighting")
         .help(
             "Controls the importance of meeting all the NoC traffic flow latency constraints."
             "This value can be >=0, where 0 would mean the latency constraints have no relevance to placement, a value of 1 would mean the latency constraints are weighted equally to the sum of other placement cost components and a value greater than 1 would mean the placement is increasingly dominated by meeting the latency constraints of the traffic flows.")
-        .default_value("1")
+        .default_value("0.6")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     noc_grp.add_argument<double>(args.noc_latency_weighting, "--noc_latency_weighting")
         .help(
             "Controls the importance of reducing the latencies of the NoC traffic flows."
             "This value can be >=0, where 0 would mean the latencies have no relevance to placement, a value of 1 would mean the latencies  are weighted equally to the sum of other placement cost components and a value greater than 1 would mean the placement is increasingly dominated by reducing the latencies of the traffic flows.")
-        .default_value("0.05")
+        .default_value("0.02")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     noc_grp.add_argument<double>(args.noc_congestion_weighting, "--noc_congestion_weighting")

From 619d9e7d3b47e26ca8ab38063785b4502033b319 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 12 Feb 2024 15:30:55 -0500
Subject: [PATCH 38/41] removed unused arguments

---
 vpr/src/place/initial_noc_placement.cpp | 2 +-
 vpr/src/place/noc_place_checkpoint.cpp  | 4 ++--
 vpr/src/place/noc_place_checkpoint.h    | 3 +--
 vpr/src/place/noc_place_utils.cpp       | 2 +-
 vpr/src/place/noc_place_utils.h         | 3 +--
 vpr/src/place/place_checkpoint.cpp      | 2 +-
 6 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp
index f3298062886..9294f3b291b 100644
--- a/vpr/src/place/initial_noc_placement.cpp
+++ b/vpr/src/place/initial_noc_placement.cpp
@@ -240,7 +240,7 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     }
 
     if (checkpoint.get_cost() < costs.cost) {
-        checkpoint.restore_checkpoint(noc_opts, costs);
+        checkpoint.restore_checkpoint(costs);
     }
 }
 
diff --git a/vpr/src/place/noc_place_checkpoint.cpp b/vpr/src/place/noc_place_checkpoint.cpp
index a25cd9ec82c..e0f41dc94f0 100644
--- a/vpr/src/place/noc_place_checkpoint.cpp
+++ b/vpr/src/place/noc_place_checkpoint.cpp
@@ -32,7 +32,7 @@ void NoCPlacementCheckpoint::save_checkpoint(double cost) {
     cost_ = cost;
 }
 
-void NoCPlacementCheckpoint::restore_checkpoint(const t_noc_opts& noc_opts, t_placer_costs& costs) {
+void NoCPlacementCheckpoint::restore_checkpoint(t_placer_costs& costs) {
     const auto& noc_ctx = g_vpr_ctx.noc();
     const auto& device_ctx = g_vpr_ctx.device();
     auto& place_ctx = g_vpr_ctx.mutable_placement();
@@ -68,7 +68,7 @@ void NoCPlacementCheckpoint::restore_checkpoint(const t_noc_opts& noc_opts, t_pl
     }
 
     // Re-initialize routes and static variables that keep track of NoC-related costs
-    reinitialize_noc_routing(noc_opts, costs);
+    reinitialize_noc_routing(costs);
 }
 
 bool NoCPlacementCheckpoint::is_valid() const {
diff --git a/vpr/src/place/noc_place_checkpoint.h b/vpr/src/place/noc_place_checkpoint.h
index bf5c4305616..11df0a50732 100644
--- a/vpr/src/place/noc_place_checkpoint.h
+++ b/vpr/src/place/noc_place_checkpoint.h
@@ -43,10 +43,9 @@ class NoCPlacementCheckpoint {
     /**
      * @brief Loads the save checkpoint into global placement data structues.
      *
-     *  @param noc_opts: Contains weighting factors for different NoC cost terms
      *  @param costs: Used to load NoC related costs for the checkpoint
      */
-    void restore_checkpoint(const t_noc_opts& noc_opts, t_placer_costs& costs);
+    void restore_checkpoint(t_placer_costs& costs);
 
     /**
      * @brief Indicates whether the object is empty or it has already stored a
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 0afc87e57f8..2eedef335a1 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -66,7 +66,7 @@ void initial_noc_routing(void) {
     return;
 }
 
-void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs) {
+void reinitialize_noc_routing(t_placer_costs& costs) {
     // used to access NoC links and modify them
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index db041b59cb0..c97894d126d 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -70,10 +70,9 @@ void initial_noc_routing(void);
  * traffic flow routes, and static variable in noc_place_utils.cpp are no
  * longer valid and need to be re-initialized.
  *
- * @param noc_opts NoC-related options used to calculated NoC costs
  * @param costs Used to get aggregate bandwidth and latency costs.
  */
-void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs);
+void reinitialize_noc_routing(t_placer_costs& costs);
 
 /**
  * @brief Goes through all the cluster blocks that were moved
diff --git a/vpr/src/place/place_checkpoint.cpp b/vpr/src/place/place_checkpoint.cpp
index dd9b9a0d9f1..73d49e6e80c 100644
--- a/vpr/src/place/place_checkpoint.cpp
+++ b/vpr/src/place/place_checkpoint.cpp
@@ -63,7 +63,7 @@ void restore_best_placement(t_placement_checkpoint& placement_checkpoint,
          * and need to be re-computed from scratch.
          */
         if (noc_opts.noc) {
-            reinitialize_noc_routing(noc_opts, costs);
+            reinitialize_noc_routing(costs);
         }
 
         VTR_LOG("\nCheckpoint restored\n");

From a92ba80ff964d73e78bb0380430252e1e2253f26 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 13 Feb 2024 16:12:43 -0500
Subject: [PATCH 39/41] applied PR comments

---
 vpr/src/base/read_options.cpp            | 25 ++++++++++++------
 vpr/src/base/vpr_types.h                 |  6 ++---
 vpr/src/noc/noc_storage.h                |  6 +++--
 vpr/src/place/noc_place_utils.cpp        | 20 +++++++++++----
 vpr/src/place/noc_place_utils.h          | 16 ++++++++++++
 vpr/src/place/place_util.cpp             |  2 +-
 vpr/src/place/place_util.h               | 32 +++++++++++-------------
 vtr_flow/scripts/python_libs/vtr/util.py |  6 +++++
 8 files changed, 78 insertions(+), 35 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 626c9af8735..7765bf5acf6 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2813,28 +2813,39 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
     noc_grp.add_argument<double>(args.noc_placement_weighting, "--noc_placement_weighting")
         .help(
             "Controls the importance of the NoC placement parameters relative to timing and wirelength of the design."
-            "This value can be >=0, where 0 would mean the placement is based solely on timing and wirelength, a value of 1 would mean noc placement is considered equal to timing and wirelength and a value greater than 1 would mean the placement is increasingly dominated by NoC parameters.")
+            "This value can be >=0, where 0 would mean the placement is based solely on timing and wirelength."
+            "A value of 1 would mean noc placement is considered equal to timing and wirelength"
+            "A value greater than 1 would mean the placement is increasingly dominated by NoC parameters.")
         .default_value("5.0")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     noc_grp.add_argument<double>(args.noc_latency_constraints_weighting, "--noc_latency_constraints_weighting")
         .help(
-            "Controls the importance of meeting all the NoC traffic flow latency constraints."
-            "This value can be >=0, where 0 would mean the latency constraints have no relevance to placement, a value of 1 would mean the latency constraints are weighted equally to the sum of other placement cost components and a value greater than 1 would mean the placement is increasingly dominated by meeting the latency constraints of the traffic flows.")
+            "Controls the importance of meeting all the NoC traffic flow latency constraints.\n"
+            "This value can be >=0, where 0 would mean the latency constraints have no relevance to placement.\n"
+            "Other positive numbers specify the importance of meeting latency constraints to other NoC-related cost terms.\n"
+            "Weighting factors for NoC-related cost terms are normalized internally. Therefore, their absolute values are not important, and"
+            "only their relative ratios determine the importance of each cost term.")
         .default_value("0.6")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     noc_grp.add_argument<double>(args.noc_latency_weighting, "--noc_latency_weighting")
         .help(
-            "Controls the importance of reducing the latencies of the NoC traffic flows."
-            "This value can be >=0, where 0 would mean the latencies have no relevance to placement, a value of 1 would mean the latencies  are weighted equally to the sum of other placement cost components and a value greater than 1 would mean the placement is increasingly dominated by reducing the latencies of the traffic flows.")
+            "Controls the importance of reducing the latencies of the NoC traffic flows.\n"
+            "This value can be >=0, where 0 would mean the latencies have no relevance to placement.\n"
+            "Other positive numbers specify the importance of minimizing aggregate latency to other NoC-related cost terms.\n"
+            "Weighting factors for NoC-related cost terms are normalized internally. Therefore, their absolute values are not important, and"
+            "only their relative ratios determine the importance of each cost term.")
         .default_value("0.02")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     noc_grp.add_argument<double>(args.noc_congestion_weighting, "--noc_congestion_weighting")
         .help(
-            "Controls the importance of reducing the congestion of the NoC links."
-            "This value can be >=0, where 0 would mean the congestion has no relevance to placement, a value of 1 would mean the congestion is weighted equally to the sum of other placement cost components and a value greater than 1 would mean the placement is increasingly dominated by reducing the link congestions.")
+            "Controls the importance of reducing the congestion of the NoC links.\n"
+            "This value can be >=0, where 0 would mean the congestion has no relevance to placement.\n"
+            "Other positive numbers specify the importance of minimizing congestion to other NoC-related cost terms.\n"
+            "Weighting factors for NoC-related cost terms are normalized internally. Therefore, their absolute values are not important, and"
+            "only their relative ratios determine the importance of each cost term.")
         .default_value("0.00")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 78e3c3eb44c..185c1c4229f 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1498,11 +1498,11 @@ struct t_noc_opts {
     std::string noc_flows_file;               ///<name of the file that contains all the traffic flow information to be sent over the NoC in this design
     std::string noc_routing_algorithm;        ///<controls the routing algorithm used to route packets within the NoC
     double noc_placement_weighting;           ///<controls the significance of the NoC placement cost relative to the total placement cost range:[0-inf)
-    double noc_aggregate_bandwidth_weighting;
-    double noc_latency_constraints_weighting; ///<controls the significance of meeting the traffic flow contraints range:[0-inf)
+    double noc_aggregate_bandwidth_weighting; ///<controls the significance of aggregate used bandwidth relative to other NoC placement costs:[0:-inf)
+    double noc_latency_constraints_weighting; ///<controls the significance of meeting the traffic flow constraints range:[0-inf)
     double noc_latency_weighting;             ///<controls the significance of the traffic flow latencies relative to the other NoC placement costs range:[0-inf)
     double noc_congestion_weighting;          ///<controls the significance of the link congestions relative to the other NoC placement costs range:[0-inf)
-    int noc_swap_percentage;                  ///<controls the number of NoC router block swap attemps relative to the total number of swaps attempted by the placer range:[0-100]
+    int noc_swap_percentage;                  ///<controls the number of NoC router block swap attempts relative to the total number of swaps attempted by the placer range:[0-100]
     std::string noc_placement_file_name;      ///<is the name of the output file that contains the NoC placement information
 };
 
diff --git a/vpr/src/noc/noc_storage.h b/vpr/src/noc/noc_storage.h
index 4870ea34be3..637d9f52126 100644
--- a/vpr/src/noc/noc_storage.h
+++ b/vpr/src/noc/noc_storage.h
@@ -30,7 +30,7 @@
  * in the NoC. They can be thought of as edges in a graph. Links
  * have a source router where they exit from and sink router where
  * they enter. It is important to note that the links are not
- * bi-directional, the legal way to traverse a link is from the
+ * bi-directional; the legal way to traverse a link is from the
  * source router of the link to the sink router.
  * 
  */
@@ -271,8 +271,10 @@ class NocStorage {
 
     /**
      * @brief Given source and sink router identifiers, this function
-     * finds a link connecting these routers and returns it identifier.
+     * finds a link connecting these routers and returns its identifier.
      * If such a link does not exist, an invalid id is returned.
+     * The function is not optimized for performance as it has a complexity
+     * of O(N_links).
      *
      * @param src_router The unique router identifier for the source router.
      * @param dst_router The unique router identifier for the destination router.
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 2eedef335a1..a228cd1836e 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -86,9 +86,12 @@ void reinitialize_noc_routing(t_placer_costs& costs) {
 
 void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected,
                                                     NocCostTerms& delta_c) {
+    /* For speed, delta_c is passed by reference instead of being returned.
+     * We expect delta cost terms to be zero to ensure correctness.
+     */
     VTR_ASSERT_SAFE(delta_c.aggregate_bandwidth == 0.);
     VTR_ASSERT_SAFE(delta_c.latency == 0.);
-    VTR_ASSERT(delta_c.latency_overrun == 0.);
+    VTR_ASSERT_SAFE(delta_c.latency_overrun == 0.);
     VTR_ASSERT_SAFE(delta_c.congestion == 0.);
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
@@ -230,7 +233,7 @@ void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id,
             // first check to see whether we have already re-routed the current traffic flow and only re-route it if we haven't already.
             if (updated_traffic_flows.find(traffic_flow_id) == updated_traffic_flows.end()) {
                 // get all links for this flow route before it is rerouted
-                // The returned const std::vector<NocLinkId>& is copied so that we can modify (sort) it
+                // The returned const std::vector<NocLinkId>& is copied so that we can modify (sort) it in find_affected_links_by_flow_reroute()
                 std::vector<NocLinkId> prev_traffic_flow_links = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
 
                 // now update the current traffic flow by re-routing it based on the new locations of its src and destination routers
@@ -603,9 +606,16 @@ double calculate_noc_cost(const NocCostTerms& cost_terms,
     double cost = 0.0;
 
     /* NoC's contribution to the placement cost is a weighted sum over:
-     * 1) Traffic flow latency costs
-     * 2) Traffic flow aggregate bandwidth costs
-     * 3) Link congestion costs
+     * 1) Traffic flow aggregate bandwidth cost
+     * 2) Traffic flow latency cost
+     * 3) Traffic flow latency overrun cost
+     * 4) Link congestion cost
+     *
+     * Since NoC-related cost terms have different scales, they are
+     * rescaled by multiplying each cost term with its corresponding
+     * normalization factor. Then, a weighted sum over normalized cost terms
+     * is computed. Weighting factors determine the contribution of each
+     * normalized term to the sum.
      */
     cost = noc_opts.noc_placement_weighting * (
                cost_terms.aggregate_bandwidth * norm_factors.aggregate_bandwidth * noc_opts.noc_aggregate_bandwidth_weighting +
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index c97894d126d..24926c48925 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -35,6 +35,8 @@ constexpr double INVALID_NOC_COST_TERM = -1.0;
  * @brief Each traffic flow cost consists of two components:
  *        1) traffic flow aggregate bandwidth (sum over all used links of the traffic flow bandwidth)
  *        2) traffic flow latency (currently unloaded/best-case latency of the flow)
+ *        3) traffic flow latency overrun (how much the latency is higher than the
+ *        latency constraint for a traffic flow.
  *        NoC placement code will keep an array-of-struct to easily access each
  *        traffic flow cost.
  */
@@ -378,6 +380,9 @@ double calculate_traffic_flow_aggregate_bandwidth_cost(const std::vector<NocLink
  * latencies.
  * @param traffic_flow_info Contains the traffic flow priority.
  * @return The computed latency cost terms for the given traffic flow.
+ * The first element is the total latency experience by the traffic flow.
+ * The second one specifies how much the experienced latency exceeds the
+ * latency constraint set for this traffic flow.
  */
 std::pair<double, double> calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route,
                                                               const NocStorage& noc_model,
@@ -394,6 +399,15 @@ std::pair<double, double> calculate_traffic_flow_latency_cost(const std::vector<
  */
 double calculate_link_congestion_cost(const NocLink& link);
 
+/**
+ * @brief The user passes weighting factors for aggregate latency
+ * and latency overrun terms. The weighting factor for aggregate
+ * bandwidth is computed by subtracting two user-provided weighting
+ * factor from 1. The computed aggregate bandwidth weighting factor
+ * is stored in noc_opts argument.
+ *
+ * @param noc_opts Contains weighting factors.
+ */
 void normalize_noc_cost_weighting_factor(t_noc_opts& noc_opts);
 
 /**
@@ -424,6 +438,8 @@ int get_number_of_traffic_flows_with_latency_cons_met(void);
 
 /**
  * @brief Goes through all NoC links and counts the congested ones.
+ * A congested NoC link is a link whose used bandwidth exceeds its
+ * bandwidth capacity.
  *
  * @return The total number of congested NoC links.
  */
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 7358823d981..3f7856afa88 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -431,7 +431,7 @@ void alloc_and_load_legal_placement_locations(std::vector<std::vector<std::vecto
                             continue;
                         }
                         // If this is the anchor position of a block, add it to the legal_pos.
-                        // Otherwise don't, so large blocks aren't added multiple times.
+                        // Otherwise, don't, so large blocks aren't added multiple times.
                         if (device_ctx.grid.get_width_offset({i, j, layer_num}) == 0 && device_ctx.grid.get_height_offset({i, j, layer_num}) == 0) {
                             int itype = tile->index;
                             int isub_tile = sub_tile.index;
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 22415b309d6..e77ee98917d 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -18,12 +18,18 @@ class t_placer_costs;
 
 /**
  * @brief Data structure that stores different cost terms for NoC placement.
+ * This data structure can also be used to store normalization and weighting
+ * factors for NoC-related cost terms.
  *
- *   @param aggregate_bandwidth The total used bandwidth used in the NoC.
- *   @param latency A weighted average between aggregate latency and
- *   latency overruns.
- *   @param congestion The sum of congestion divided by available bandwidth
- *   over all NoC links.
+ *   @param aggregate_bandwidth The aggregate NoC bandwidth cost. This is
+ *   computed by summing all used link bandwidths.
+ *   @param latency The NoC latency cost, calculated as the sum of latencies
+ *   experienced by each traffic flow.
+ *   @param latency_overrun Sum of latency overrun for traffic flows that have
+ *   a latency constraint.
+ *   @param congestion The NoC congestion cost, i.e. how over-utilized
+ *   NoC links are. This is computed by dividing over-utilized bandwidth
+ *   by link bandwidth, and summing all computed ratios.
  */
 struct NocCostTerms {
   public:
@@ -61,22 +67,14 @@ struct NocCostTerms {
  *   @param timing_cost_norm The normalization factor for the timing cost, which
  *              is upper-bounded by the value of MAX_INV_TIMING_COST.
  *
- *   @param noc_aggregate_bandwidth_cost The aggregate NoC bandwidth cost
- *   @param noc_aggregate_bandwidth_cost_norm The normalization factor for
- *   the aggregate bandwidth cost
- *   @param noc_latency_cost The NoC latency cost,
- *   calculated as the sum of latencies experienced by each traffic flow
- *   @param noc_latency_cost_norm The normalization factor for the latency cost
- *   @param noc_congestion_cost The NoC congestion cost, i.e. how over-utilized
- *   NoC links are
- *   @param noc_congestion_cost_norm The normalization factor for the NoC
- *   congestion cost
+ *   @param noc_cost_terms NoC-related cost terms
+ *   @param noc_cost_norm_factors Normalization factors for NoC-related cost terms.
  *
  *   @param MAX_INV_TIMING_COST Stops inverse timing cost from going to infinity
  *              with very lax timing constraints, which avoids multiplying by a
  *              gigantic timing_cost_norm when auto-normalizing. The exact value
  *              of this cost has relatively little impact, but should be large
- *              enough to not affect the timing costs computatation for normal 
+ *              enough to not affect the timing costs computation for normal
  *              constraints.
  *
  *   @param place_algorithm Determines how the member values are updated upon
@@ -94,7 +92,7 @@ class t_placer_costs {
     NocCostTerms noc_cost_norm_factors;
 
   public: //Constructor
-    t_placer_costs(t_place_algorithm algo)
+    explicit t_placer_costs(t_place_algorithm algo)
         : place_algorithm(algo) {}
     t_placer_costs() = default;
 
diff --git a/vtr_flow/scripts/python_libs/vtr/util.py b/vtr_flow/scripts/python_libs/vtr/util.py
index a1b8c598405..dcb1bc81cdb 100644
--- a/vtr_flow/scripts/python_libs/vtr/util.py
+++ b/vtr_flow/scripts/python_libs/vtr/util.py
@@ -146,6 +146,12 @@ def run_system_command(
         try:
             # Call the command
             stderr = None if self._valgrind else subprocess.STDOUT
+            '''
+            capnproto accesses PWD environment variable to learn about the current working directory.
+            However, subprocess.Popen() changes the working directory without updating this variable.
+            This can cause issues when a VTR task passes router lookahead or RR graph files to VPR.
+            PWD environment variable is updated manually to prevent capnproto from throwing exceptions.  
+            '''
             modified_environ = os.environ.copy()
             modified_environ['PWD'] = str(temp_dir)
             proc = subprocess.Popen(

From b9add7fab6ca50b4b3e7c2cd83c4fb5e98ba4738 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 13 Feb 2024 16:22:21 -0500
Subject: [PATCH 40/41] moved comments from source file to header

---
 vpr/src/place/place_util.cpp | 96 ++----------------------------------
 vpr/src/place/place_util.h   | 92 ++++++++++++++++++++++++++++++++--
 2 files changed, 93 insertions(+), 95 deletions(-)

diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 3f7856afa88..6c7f506ee3e 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -9,17 +9,14 @@
 #include "draw_global.h"
 #include "place_constraints.h"
 
-/* File-scope routines */
-static GridBlock init_grid_blocks();
-
 /**
- * @brief Initialize the placer's block-grid dual direction mapping.
- *
- * Forward direction - block to grid: place_ctx.block_locs.
- * Reverse direction - grid to block: place_ctx.grid_blocks.
+ * @brief Initialize `grid_blocks`, the inverse structure of `block_locs`.
  *
- * Initialize both of them to empty states.
+ * The container at each grid block location should have a length equal to the
+ * subtile capacity of that block. Unused subtile would be marked EMPTY_BLOCK_ID.
  */
+static GridBlock init_grid_blocks();
+
 void init_placement_context() {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
     auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -32,12 +29,6 @@ void init_placement_context() {
     place_ctx.grid_blocks = init_grid_blocks();
 }
 
-/**
- * @brief Initialize `grid_blocks`, the inverse structure of `block_locs`.
- *
- * The container at each grid block location should have a length equal to the
- * subtile capacity of that block. Unused subtile would be marked EMPTY_BLOCK_ID.
- */
 static GridBlock init_grid_blocks() {
     auto& device_ctx = g_vpr_ctx.device();
     int num_layers = device_ctx.grid.get_num_layers();
@@ -56,12 +47,6 @@ static GridBlock init_grid_blocks() {
     return grid_blocks;
 }
 
-/**
- * @brief Mutator: updates the norm factors in the outer loop iteration.
- *
- * At each temperature change we update these values to be used
- * for normalizing the trade-off between timing and wirelength (bb)
- */
 void t_placer_costs::update_norm_factors() {
     if (place_algorithm.is_timing_driven()) {
         bb_cost_norm = 1 / bb_cost;
@@ -73,11 +58,6 @@ void t_placer_costs::update_norm_factors() {
     }
 }
 
-/**
- * @brief Accumulates NoC cost difference terms
- *
- * @param noc_delta_cost NoC cost difference if the swap is accepted
- */
 t_placer_costs& t_placer_costs::operator+=(const NocCostTerms& noc_delta_cost) {
     noc_cost_terms += noc_delta_cost;
 
@@ -116,20 +96,6 @@ t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
     UPPER_RLIM = std::max(grid.width() - 1, grid.height() - 1);
 }
 
-/**
- * @brief Get the initial limit for inner loop block move attempt limit.
- *
- * There are two ways to scale the move limit.
- * e_place_effort_scaling::CIRCUIT
- *      scales the move limit proportional to num_blocks ^ (4/3)
- * e_place_effort_scaling::DEVICE_CIRCUIT
- *      scales the move limit proportional to device_size ^ (2/3) * num_blocks ^ (2/3)
- *
- * The second method is almost identical to the first one when the device
- * is highly utilized (device_size ~ num_blocks). For low utilization devices
- * (device_size >> num_blocks), the search space is larger, so the second method
- * performs more moves to ensure better optimization.
- */
 int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched) {
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -153,16 +119,6 @@ int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sch
     return move_lim;
 }
 
-/**
- * @brief Update the annealing state according to the annealing schedule selected.
- *
- *   USER_SCHED:  A manual fixed schedule with fixed alpha and exit criteria.
- *   AUTO_SCHED:  A more sophisticated schedule where alpha varies based on success ratio.
- *   DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio.
- *                See doc/src/vpr/dusty_sa.rst for more details.
- *
- * @return True->continues the annealing. False->exits the annealing.
- */
 bool t_annealing_state::outer_loop_update(float success_rate,
                                           const t_placer_costs& costs,
                                           const t_placer_opts& placer_opts,
@@ -248,33 +204,12 @@ bool t_annealing_state::outer_loop_update(float success_rate,
     return true;
 }
 
-/**
- * @brief Update the range limiter to keep acceptance prob. near 0.44.
- *
- * Use a floating point rlim to allow gradual transitions at low temps.
- * The range is bounded by 1 (FINAL_RLIM) and the grid size (UPPER_RLIM).
- */
 void t_annealing_state::update_rlim(float success_rate) {
     rlim *= (1. - 0.44 + success_rate);
     rlim = std::min(rlim, UPPER_RLIM);
     rlim = std::max(rlim, FINAL_RLIM);
 }
 
-/**
- * @brief Update the criticality exponent.
- *
- * When rlim shrinks towards the FINAL_RLIM value (indicating
- * that we are fine-tuning a more optimized placement), we can
- * focus more on a smaller number of critical connections.
- * To achieve this, we make the crit_exponent sharper, so that
- * critical connections would become more critical than before.
- *
- * We calculate how close rlim is to its final value comparing
- * to its initial value. Then, we apply the same scaling factor
- * on the crit_exponent so that it lands on the suitable value
- * between td_place_exp_first and td_place_exp_last. The scaling
- * factor is calculated and applied linearly.
- */
 void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
     /* If rlim == FINAL_RLIM, then scale == 0. */
     float scale = 1 - (rlim - FINAL_RLIM) * INVERSE_DELTA_RLIM;
@@ -284,11 +219,6 @@ void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
                     + placer_opts.td_place_exp_first;
 }
 
-/**
- * @brief Update the move limit based on the success rate.
- *
- * The value is bounded between 1 and move_lim_max.
- */
 void t_annealing_state::update_move_lim(float success_target, float success_rate) {
     move_lim = move_lim_max * (success_target / success_rate);
     move_lim = std::min(move_lim, move_lim_max);
@@ -330,13 +260,6 @@ void t_placer_statistics::calc_iteration_stats(const t_placer_costs& costs, int
     std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
 }
 
-/**
- * @brief Returns the standard deviation of data set x.
- *
- * There are n sample points, sum_x_squared is the summation over n of x^2 and av_x
- * is the average x. All operations are done in double precision, since round off
- * error can be a problem in the initial temp. std_dev calculation for big circuits.
- */
 double get_std_dev(int n, double sum_x_squared, double av_x) {
     double std_dev;
     if (n <= 1) {
@@ -396,15 +319,6 @@ void zero_initialize_grid_blocks() {
     }
 }
 
-/**
- * @brief Builds (alloc and load) legal_pos that holds all the legal locations for placement
- *
- *   @param legal_pos
- *              a lookup of all subtiles by sub_tile type
- *              legal_pos[0..device_ctx.num_block_types-1][0..num_sub_tiles - 1] = std::vector<t_pl_loc> of all the legal locations 
- *              of the proper tile type and sub_tile type
- *
- */
 void alloc_and_load_legal_placement_locations(std::vector<std::vector<std::vector<t_pl_loc>>>& legal_pos) {
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_ctx = g_vpr_ctx.placement();
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index e77ee98917d..12bd6ce745b 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -97,7 +97,19 @@ class t_placer_costs {
     t_placer_costs() = default;
 
   public: //Mutator
+    /**
+    * @brief Mutator: updates the norm factors in the outer loop iteration.
+    *
+    * At each temperature change we update these values to be used
+    * for normalizing the trade-off between timing and wirelength (bb)
+    */
     void update_norm_factors();
+
+    /**
+    * @brief Accumulates NoC cost difference terms
+    *
+    * @param noc_delta_cost Cost difference for NoC-related costs terms
+    */
     t_placer_costs& operator+=(const NocCostTerms& noc_delta_cost);
 
   private:
@@ -180,14 +192,52 @@ class t_annealing_state {
                       int num_layers);
 
   public: //Mutator
+    /**
+    * @brief Update the annealing state according to the annealing schedule selected.
+    *
+    *   USER_SCHED:  A manual fixed schedule with fixed alpha and exit criteria.
+    *   AUTO_SCHED:  A more sophisticated schedule where alpha varies based on success ratio.
+    *   DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio.
+    *                See doc/src/vpr/dusty_sa.rst for more details.
+    *
+    * @return True->continues the annealing. False->exits the annealing.
+    */
     bool outer_loop_update(float success_rate,
                            const t_placer_costs& costs,
                            const t_placer_opts& placer_opts,
                            const t_annealing_sched& annealing_sched);
 
   private: //Mutator
+    /**
+    * @brief Update the range limiter to keep acceptance prob. near 0.44.
+    *
+    * Use a floating point rlim to allow gradual transitions at low temps.
+    * The range is bounded by 1 (FINAL_RLIM) and the grid size (UPPER_RLIM).
+    */
     inline void update_rlim(float success_rate);
+
+    /**
+    * @brief Update the criticality exponent.
+    *
+    * When rlim shrinks towards the FINAL_RLIM value (indicating
+    * that we are fine-tuning a more optimized placement), we can
+    * focus more on a smaller number of critical connections.
+    * To achieve this, we make the crit_exponent sharper, so that
+    * critical connections would become more critical than before.
+    *
+    * We calculate how close rlim is to its final value comparing
+    * to its initial value. Then, we apply the same scaling factor
+    * on the crit_exponent so that it lands on the suitable value
+    * between td_place_exp_first and td_place_exp_last. The scaling
+    * factor is calculated and applied linearly.
+    */
     inline void update_crit_exponent(const t_placer_opts& placer_opts);
+
+    /**
+    * @brief Update the move limit based on the success rate.
+    *
+    * The value is bounded between 1 and move_lim_max.
+    */
     inline void update_move_lim(float success_target, float success_rate);
 };
 
@@ -245,13 +295,39 @@ class t_placer_statistics {
     void single_swap_update(const t_placer_costs& costs);
 };
 
-///@brief Initialize the placer's block-grid dual direction mapping.
+/**
+ * @brief Initialize the placer's block-grid dual direction mapping.
+ *
+ * Forward direction - block to grid: place_ctx.block_locs.
+ * Reverse direction - grid to block: place_ctx.grid_blocks.
+ *
+ * Initialize both of them to empty states.
+ */
 void init_placement_context();
 
-///@brief Get the initial limit for inner loop block move attempt limit.
+/**
+ * @brief Get the initial limit for inner loop block move attempt limit.
+ *
+ * There are two ways to scale the move limit.
+ * e_place_effort_scaling::CIRCUIT
+ *      scales the move limit proportional to num_blocks ^ (4/3)
+ * e_place_effort_scaling::DEVICE_CIRCUIT
+ *      scales the move limit proportional to device_size ^ (2/3) * num_blocks ^ (2/3)
+ *
+ * The second method is almost identical to the first one when the device
+ * is highly utilized (device_size ~ num_blocks). For low utilization devices
+ * (device_size >> num_blocks), the search space is larger, so the second method
+ * performs more moves to ensure better optimization.
+ */
 int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched);
 
-///@brief Returns the standard deviation of data set x.
+/**
+ * @brief Returns the standard deviation of data set x.
+ *
+ * There are n sample points, sum_x_squared is the summation over n of x^2 and av_x
+ * is the average x. All operations are done in double precision, since round off
+ * error can be a problem in the initial temp. std_dev calculation for big circuits.
+ */
 double get_std_dev(int n, double sum_x_squared, double av_x);
 
 ///@brief Initialize usage to 0 and blockID to EMPTY_BLOCK_ID for all place_ctx.grid_block locations
@@ -260,7 +336,15 @@ void zero_initialize_grid_blocks();
 ///@brief a utility to calculate grid_blocks given the updated block_locs (used in restore_checkpoint)
 void load_grid_blocks_from_block_locs();
 
-///@brief Builds legal_pos structure. legal_pos[type->index] is an array that gives every legal value of (x,y,z) that can accommodate a block.
+/**
+ * @brief Builds (alloc and load) legal_pos that holds all the legal locations for placement
+ *
+ *   @param legal_pos
+ *              a lookup of all subtiles by sub_tile type
+ *              legal_pos[0..device_ctx.num_block_types-1][0..num_sub_tiles - 1] = std::vector<t_pl_loc> of all the legal locations
+ *              of the proper tile type and sub_tile type
+ *
+ */
 void alloc_and_load_legal_placement_locations(std::vector<std::vector<std::vector<t_pl_loc>>>& legal_pos);
 
 ///@brief Performs error checking to see if location is legal for block type, and sets the location and grid usage of the block if it is legal.

From 581c3a4aad413932e5a92937c92f22e40e30ac56 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 13 Feb 2024 17:49:32 -0500
Subject: [PATCH 41/41] fix pylint errors

---
 vtr_flow/scripts/python_libs/vtr/util.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/vtr_flow/scripts/python_libs/vtr/util.py b/vtr_flow/scripts/python_libs/vtr/util.py
index dcb1bc81cdb..61b52ee991b 100644
--- a/vtr_flow/scripts/python_libs/vtr/util.py
+++ b/vtr_flow/scripts/python_libs/vtr/util.py
@@ -146,14 +146,16 @@ def run_system_command(
         try:
             # Call the command
             stderr = None if self._valgrind else subprocess.STDOUT
-            '''
-            capnproto accesses PWD environment variable to learn about the current working directory.
-            However, subprocess.Popen() changes the working directory without updating this variable.
-            This can cause issues when a VTR task passes router lookahead or RR graph files to VPR.
-            PWD environment variable is updated manually to prevent capnproto from throwing exceptions.  
-            '''
+
+            # capnproto accesses PWD environment variable to learn about
+            # the current working directory. However, subprocess.Popen()
+            # changes the working directory without updating this variable.
+            # This can cause issues when a VTR task passes router lookahead
+            # or RR graph files to VPR. PWD environment variable is updated
+            # manually to prevent capnproto from throwing exceptions.
             modified_environ = os.environ.copy()
             modified_environ['PWD'] = str(temp_dir)
+
             proc = subprocess.Popen(
                 cmd,
                 stdout=subprocess.PIPE,  # We grab stdout