From 7ba4dd1aaba4d471faf19402035c468513269219 Mon Sep 17 00:00:00 2001
From: Duck Deux <duck2@protonmail.com>
Date: Thu, 15 Aug 2024 08:49:20 -0700
Subject: [PATCH] Add flat version of sync_netlists_to_routing

---
 utils/fasm/src/main.cpp                       |  14 +-
 utils/fasm/test/test_fasm.cpp                 |  25 +-
 vpr/src/base/atom_lookup.cpp                  |   1 -
 vpr/src/base/vpr_api.cpp                      |  27 +-
 vpr/src/base/vpr_context.h                    |   3 +
 vpr/src/pack/post_routing_pb_pin_fixup.cpp    |  34 +-
 vpr/src/pack/post_routing_pb_pin_fixup.h      |   3 +-
 .../pack/sync_netlists_to_routing_flat.cpp    | 368 ++++++++++++++++++
 vpr/src/pack/sync_netlists_to_routing_flat.h  |   7 +
 vpr/src/route/annotate_routing.cpp            |  33 +-
 vpr/src/route/annotate_routing.h              |   6 +-
 vpr/src/route/route_common.cpp                |   2 +
 vpr/src/route/route_utils.h                   |  11 +
 13 files changed, 453 insertions(+), 81 deletions(-)
 create mode 100644 vpr/src/pack/sync_netlists_to_routing_flat.cpp
 create mode 100644 vpr/src/pack/sync_netlists_to_routing_flat.h
diff --git a/utils/fasm/src/main.cpp b/utils/fasm/src/main.cpp
index 61c8a7f44fa..dfd4b0ba319 100644
--- a/utils/fasm/src/main.cpp
+++ b/utils/fasm/src/main.cpp
@@ -23,6 +23,7 @@ using namespace std;
 #include "fasm.h"
 
 #include "post_routing_pb_pin_fixup.h"
+#include "sync_netlists_to_routing_flat.h"
 
 /*
  * Exit codes to signal success/failure to scripts
@@ -86,15 +87,7 @@ int main(int argc, const char **argv) {
         bool is_flat = vpr_setup.RouterOpts.flat_routing;
         if (flow_succeeded) {
             if(is_flat) {
-                sync_netlists_to_routing((const Netlist<>&) g_vpr_ctx.atom().nlist,
-                                         g_vpr_ctx.device(),
-                                         g_vpr_ctx.mutable_atom(),
-                                         g_vpr_ctx.atom().lookup,
-                                         g_vpr_ctx.mutable_clustering(),
-                                         g_vpr_ctx.placement(),
-                                         g_vpr_ctx.routing(),
-                                         vpr_setup.PackerOpts.pack_verbosity > 2,
-                                         is_flat);
+                sync_netlists_to_routing_flat();
             } else {
                 sync_netlists_to_routing((const Netlist<>&) g_vpr_ctx.clustering().clb_nlist,
                                          g_vpr_ctx.device(),
@@ -103,8 +96,7 @@ int main(int argc, const char **argv) {
                                          g_vpr_ctx.mutable_clustering(),
                                          g_vpr_ctx.placement(),
                                          g_vpr_ctx.routing(),
-                                         vpr_setup.PackerOpts.pack_verbosity > 2,
-                                         is_flat);
+                                         vpr_setup.PackerOpts.pack_verbosity > 2);
             }
         }
 
diff --git a/utils/fasm/test/test_fasm.cpp b/utils/fasm/test/test_fasm.cpp
index b700211825f..0d4039511ca 100644
--- a/utils/fasm/test/test_fasm.cpp
+++ b/utils/fasm/test/test_fasm.cpp
@@ -9,13 +9,15 @@
 #include "fasm_utils.h"
 #include "arch_util.h"
 #include "rr_graph_writer.h"
-#include "post_routing_pb_pin_fixup.h"
 #include <sstream>
 #include <fstream>
 #include <regex>
 #include <cmath>
 #include <algorithm>
 
+#include "post_routing_pb_pin_fixup.h"
+#include "sync_netlists_to_routing_flat.h"
+
 static constexpr const char kArchFile[] = "test_fasm_arch.xml";
 static constexpr const char kRrGraphFile[] = "test_fasm_rrgraph.xml";
 
@@ -327,15 +329,18 @@ TEST_CASE("fasm_integration_test", "[fasm]") {
     /* Sync netlist to the actual routing (necessary if there are block
        ports with equivalent pins) */
     if (flow_succeeded) {
-        sync_netlists_to_routing((const Netlist<>&) g_vpr_ctx.clustering().clb_nlist,
-                                 g_vpr_ctx.device(),
-                                 g_vpr_ctx.mutable_atom(),
-                                 g_vpr_ctx.atom().lookup,
-                                 g_vpr_ctx.mutable_clustering(),
-                                 g_vpr_ctx.placement(),
-                                 g_vpr_ctx.routing(),
-                                 vpr_setup.PackerOpts.pack_verbosity > 2,
-                                 is_flat);
+        if (is_flat) {
+            sync_netlists_to_routing_flat();
+        } else {
+            sync_netlists_to_routing((const Netlist<>&) g_vpr_ctx.clustering().clb_nlist,
+                                     g_vpr_ctx.device(),
+                                     g_vpr_ctx.mutable_atom(),
+                                     g_vpr_ctx.atom().lookup,
+                                     g_vpr_ctx.mutable_clustering(),
+                                     g_vpr_ctx.placement(),
+                                     g_vpr_ctx.routing(),
+                                     vpr_setup.PackerOpts.pack_verbosity > 2);
+        }
     }
 
     std::stringstream fasm_string;
diff --git a/vpr/src/base/atom_lookup.cpp b/vpr/src/base/atom_lookup.cpp
index 8b3a45c0098..8f8195e60e8 100644
--- a/vpr/src/base/atom_lookup.cpp
+++ b/vpr/src/base/atom_lookup.cpp
@@ -103,7 +103,6 @@ ClusterNetId AtomLookup::clb_net(const AtomNetId net_id) const {
 }
 
 void AtomLookup::set_atom_clb_net(const AtomNetId net_id, const ClusterNetId clb_net_index) {
-    VTR_ASSERT(net_id);
     //If either are invalid remove any mapping
     if (!net_id && clb_net_index != ClusterNetId::INVALID()) {
         //Remove
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index fac75c5fbb3..157dd76cfc6 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -83,7 +83,7 @@
 #include "arch_util.h"
 
 #include "post_routing_pb_pin_fixup.h"
-
+#include "sync_netlists_to_routing_flat.h"
 
 #include "load_flat_place.h"
 
@@ -1434,8 +1434,10 @@ bool vpr_analysis_flow(const Netlist<>& net_list,
      *   - Turn on verbose output when users require verbose output
      *     for packer (default verbosity is set to 2 for compact logs)
      */
-    if (!is_flat) {
-        if (route_status.success()) {
+    if (route_status.success()) {
+        if (is_flat) {
+            sync_netlists_to_routing_flat();
+        } else {
             sync_netlists_to_routing(net_list,
                                      g_vpr_ctx.device(),
                                      g_vpr_ctx.mutable_atom(),
@@ -1443,18 +1445,17 @@ bool vpr_analysis_flow(const Netlist<>& net_list,
                                      g_vpr_ctx.mutable_clustering(),
                                      g_vpr_ctx.placement(),
                                      g_vpr_ctx.routing(),
-                                     vpr_setup.PackerOpts.pack_verbosity > 2,
-                                     is_flat);
-
-            std::string post_routing_packing_output_file_name = vpr_setup.PackerOpts.output_file + ".post_routing";
-            write_packing_results_to_xml(vpr_setup.PackerOpts.global_clocks,
-                                         Arch.architecture_id,
-                                         post_routing_packing_output_file_name.c_str());
-        } else {
-            VTR_LOG_WARN("Synchronization between packing and routing results is not applied due to illegal circuit implementation\n");
+                                     vpr_setup.PackerOpts.pack_verbosity > 2);
         }
-        VTR_LOG("\n");
+
+        std::string post_routing_packing_output_file_name = vpr_setup.PackerOpts.output_file + ".post_routing";
+        write_packing_results_to_xml(vpr_setup.PackerOpts.global_clocks,
+                                        Arch.architecture_id,
+                                        post_routing_packing_output_file_name.c_str());
+    } else {
+        VTR_LOG_WARN("Synchronization between packing and routing results is not applied due to illegal circuit implementation\n");
     }
+    VTR_LOG("\n");
 
     vpr_analysis(net_list,
                  vpr_setup,
diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 376a5c6e01e..706ae690b62 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -496,6 +496,9 @@ struct RoutingContext : public Context {
      * @brief User specified routing constraints
      */
     UserRouteConstraints constraints;
+    
+    /** Is flat routing enabled? */
+    bool is_flat;
 };
 
 /**
diff --git a/vpr/src/pack/post_routing_pb_pin_fixup.cpp b/vpr/src/pack/post_routing_pb_pin_fixup.cpp
index ceb9263e12b..69daabf5fec 100644
--- a/vpr/src/pack/post_routing_pb_pin_fixup.cpp
+++ b/vpr/src/pack/post_routing_pb_pin_fixup.cpp
@@ -51,12 +51,11 @@ static void update_cluster_pin_with_post_routing_results(const Netlist<>& net_li
                                                          const AtomContext& atom_ctx,
                                                          const DeviceContext& device_ctx,
                                                          ClusteringContext& clustering_ctx,
-                                                         const vtr::vector<RRNodeId, ParentNetId>& rr_node_nets,
+                                                         const vtr::vector<RRNodeId, ClusterNetId>& rr_node_nets,
                                                          const t_pl_loc& grid_coord,
                                                          const ClusterBlockId& blk_id,
                                                          size_t& num_mismatches,
-                                                         const bool& verbose,
-                                                         bool is_flat) {
+                                                         const bool& verbose) {
     const int sub_tile_z = grid_coord.sub_tile;
     const int coord_x = grid_coord.x;
     const int coord_y = grid_coord.y;
@@ -210,15 +209,7 @@ static void update_cluster_pin_with_post_routing_results(const Netlist<>& net_li
             continue;
         }
 
-        ClusterNetId cluster_equivalent_net_id = ClusterNetId::INVALID();
-        if (is_flat) {
-            cluster_equivalent_net_id = atom_ctx.lookup.clb_net(convert_to_atom_net_id(routing_net_id));
-            if (routing_net_id != ParentNetId::INVALID()) {
-                VTR_ASSERT(cluster_equivalent_net_id != ClusterNetId::INVALID());
-            }
-        } else {
-            cluster_equivalent_net_id = convert_to_cluster_net_id(routing_net_id);
-        }
+        ClusterNetId cluster_equivalent_net_id = convert_to_cluster_net_id(routing_net_id);
 
         /* If the net from the routing results matches the net from the packing results,
          * nothing to be changed. Move on to the next net.
@@ -1050,8 +1041,7 @@ void sync_netlists_to_routing(const Netlist<>& net_list,
                               ClusteringContext& clustering_ctx,
                               const PlacementContext& placement_ctx,
                               const RoutingContext& routing_ctx,
-                              const bool& verbose,
-                              bool is_flat) {
+                              const bool& verbose) {
     vtr::ScopedStartFinishTimer timer("Synchronize the packed netlist to routing optimization");
 
     /* Reset the database for post-routing clb net mapping */
@@ -1059,11 +1049,9 @@ void sync_netlists_to_routing(const Netlist<>& net_list,
     clustering_ctx.pre_routing_net_pin_mapping.clear();
 
     /* Create net-to-rr_node mapping */
-    vtr::vector<RRNodeId, ParentNetId> rr_node_nets = annotate_rr_node_nets(net_list,
+    vtr::vector<RRNodeId, ClusterNetId> rr_node_nets = annotate_rr_node_nets(clustering_ctx,
                                                                             device_ctx,
-                                                                            routing_ctx,
-                                                                            verbose,
-                                                                            is_flat);
+                                                                            verbose);
 
     IntraLbPbPinLookup intra_lb_pb_pin_lookup(device_ctx.logical_block_types);
 
@@ -1076,12 +1064,7 @@ void sync_netlists_to_routing(const Netlist<>& net_list,
     /* Update the core logic (center blocks of the FPGA) */
     for (const ParentBlockId& blk_id : net_list.blocks()) {
         /* We know the entrance to grid info and mapping results, do the fix-up for this block */
-        ClusterBlockId clb_blk_id;
-        if (is_flat) {
-            clb_blk_id = atom_look_up.atom_clb(convert_to_atom_block_id(blk_id));
-        } else {
-            clb_blk_id = convert_to_cluster_block_id(blk_id);
-        }
+        ClusterBlockId clb_blk_id = convert_to_cluster_block_id(blk_id);
         VTR_ASSERT(clb_blk_id != ClusterBlockId::INVALID());
 
         if (seen_block_ids.insert(clb_blk_id).second) {
@@ -1093,8 +1076,7 @@ void sync_netlists_to_routing(const Netlist<>& net_list,
                                                          placement_ctx.block_locs[clb_blk_id].loc,
                                                          clb_blk_id,
                                                          num_mismatches,
-                                                         verbose,
-                                                         is_flat);
+                                                         verbose);
 
             update_cluster_routing_traces_with_post_routing_results(atom_ctx,
                                                                     intra_lb_pb_pin_lookup,
diff --git a/vpr/src/pack/post_routing_pb_pin_fixup.h b/vpr/src/pack/post_routing_pb_pin_fixup.h
index 3ad16c38d0c..bcb731e1bed 100644
--- a/vpr/src/pack/post_routing_pb_pin_fixup.h
+++ b/vpr/src/pack/post_routing_pb_pin_fixup.h
@@ -16,7 +16,6 @@ void sync_netlists_to_routing(const Netlist<>& net_list,
                               ClusteringContext& clustering_ctx,
                               const PlacementContext& placement_ctx,
                               const RoutingContext& routing_ctx,
-                              const bool& verbose,
-                              bool is_flat);
+                              const bool& verbose);
 
 #endif
diff --git a/vpr/src/pack/sync_netlists_to_routing_flat.cpp b/vpr/src/pack/sync_netlists_to_routing_flat.cpp
new file mode 100644
index 00000000000..ba779c30f56
--- /dev/null
+++ b/vpr/src/pack/sync_netlists_to_routing_flat.cpp
@@ -0,0 +1,368 @@
+/********************************************************************
+ * This file includes functions to fix up the pb pin mapping results 
+ * after routing optimization
+ *******************************************************************/
+/* Headers from vtrutil library */
+#include "clustered_netlist_utils.h"
+#include "logic_types.h"
+#include "netlist_fwd.h"
+#include "physical_types.h"
+#include "vtr_time.h"
+#include "vtr_assert.h"
+#include "vtr_log.h"
+
+#include "vpr_error.h"
+#include "vpr_utils.h"
+#include "rr_graph2.h"
+
+#include "annotate_routing.h"
+
+#include "sync_netlists_to_routing_flat.h"
+#include <algorithm>
+#include <unordered_map>
+
+#include "describe_rr_node.h"
+
+/* Include global variables of VPR */
+#include "globals.h"
+
+/* Output all intra-cluster connections for a RouteTreeNode */
+void get_intra_cluster_connections(const RouteTree& tree, std::vector<std::pair<RRNodeId, RRNodeId>>& out_connections){
+    auto& rr_graph = g_vpr_ctx.device().rr_graph;
+
+    for(auto& node: tree.all_nodes()){
+        const auto& parent = node.parent();
+        if(!parent) /* Root */
+            continue;
+
+        auto type = rr_graph.node_type(node.inode);
+        auto parent_type = rr_graph.node_type(parent->inode);
+
+        /* Both nodes are IPIN/OPIN: this has to be an intrablock connection */
+        if((type == OPIN || type == IPIN) && (parent_type == OPIN || parent_type == IPIN)){
+            out_connections.push_back({parent->inode, node.inode});
+        }
+    }
+}
+
+/** Rudimentary intra-cluster router between two pb_graph pins. 
+ * Easier to use than the packer's router, but it assumes that there is only one path between the provided pins.
+ * Expect this to fail/produce invalid results if that's not the case with your architecture.
+ * Outputs the path to the given pb. */
+static void route_intra_cluster_conn(const t_pb_graph_pin* source_pin, const t_pb_graph_pin* sink_pin, AtomNetId net_id, t_pb* out_pb){
+
+    std::unordered_set<const t_pb_graph_pin*> visited;
+    std::deque<const t_pb_graph_pin*> queue;
+    std::unordered_map<const t_pb_graph_pin*, const t_pb_graph_pin*> prev;
+
+    auto& out_pb_routes = out_pb->pb_route;
+
+    queue.push_back(source_pin);
+    prev[source_pin] = NULL;
+
+    while(!queue.empty()){
+        const t_pb_graph_pin* cur_pin = queue.front();
+        //std::cout << "expanding: " << cur_pin->to_string() << "\n";
+        queue.pop_front();
+        if(visited.count(cur_pin))
+            continue;
+        visited.insert(cur_pin);
+
+        /* Backtrack and return */
+        if(cur_pin == sink_pin){
+            break;
+        }
+
+        for(auto& edge: cur_pin->output_edges){
+            VTR_ASSERT(edge->num_output_pins == 1);
+            queue.push_back(edge->output_pins[0]);
+            //std::cout << "pushing back " << edge->output_pins[0]->to_string() << "\n";
+            prev[edge->output_pins[0]] = cur_pin;
+        }
+    }
+
+    VTR_ASSERT_MSG(visited.count(sink_pin), "Couldn't find sink pin");
+
+    /* Collect path: we need to build pb_routes from source to sink */
+    std::vector<const t_pb_graph_pin*> path;
+    const t_pb_graph_pin* cur_pin = sink_pin;
+    while(cur_pin){
+        path.push_back(cur_pin);
+        cur_pin = prev[cur_pin];
+    }
+
+    /* Output the path into out_pb_routes (start from source) */
+    int prev_pin_id = -1;
+    for(auto it = path.rbegin(); it != path.rend(); ++it){
+        cur_pin = *it;
+        int cur_pin_id = cur_pin->pin_count_in_cluster;
+        t_pb_route* cur_pb_route;
+
+        if(out_pb_routes.count(cur_pin_id))
+            cur_pb_route = &out_pb_routes[cur_pin_id];
+        else {
+            t_pb_route pb_route = {
+                net_id,
+                -1,
+                {},
+                cur_pin
+            };
+            out_pb_routes.insert(std::make_pair<>(cur_pin_id, pb_route));
+            cur_pb_route = &out_pb_routes[cur_pin_id];
+        }
+
+        if(prev_pin_id != -1){
+            t_pb_route& prev_pb_route = out_pb_routes[prev_pin_id];
+            prev_pb_route.sink_pb_pin_ids.push_back(cur_pin_id);
+            cur_pb_route->driver_pb_pin_id = prev_pb_route.pb_graph_pin->pin_count_in_cluster;
+        }
+
+        prev_pin_id = cur_pin_id;
+    }
+}
+
+void sync_pb_routes_to_routing(){
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& atom_ctx = g_vpr_ctx.atom();
+    auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
+    auto& placement_ctx = g_vpr_ctx.placement();
+    auto& route_ctx = g_vpr_ctx.routing();
+    auto& rr_graph = device_ctx.rr_graph;
+
+    /* Clear out existing pb_routes: they were made by the intra cluster router and are invalid now */
+    for (ClusterBlockId clb_blk_id : cluster_ctx.clb_nlist.blocks()) {
+        /* Only erase entries which are not associated with a clock net: the router doesn't touch the clock nets
+         * XXX: Assumes --clock_modeling ideal */
+        std::vector<int> pins_to_erase;
+        auto& pb_routes = cluster_ctx.clb_nlist.block_pb(clb_blk_id)->pb_route;
+        for(auto& [pin, pb_route]: pb_routes){
+            if(!route_ctx.is_clock_net[pb_route.atom_net_id])
+                pins_to_erase.push_back(pin);
+        }
+
+        for(int pin: pins_to_erase){
+            pb_routes.erase(pin);
+        }
+    }
+
+    /* Go through each route tree and rebuild the pb_routes */
+    for(ParentNetId net_id: atom_ctx.nlist.nets()){
+        auto& tree = route_ctx.route_trees[net_id];
+        if(!tree)
+            continue; /* No routing at this ParentNetId */
+
+        /* Get all intrablock connections */
+        std::vector<std::pair<RRNodeId, RRNodeId>> conns_to_restore; /* (source, sink) */
+        get_intra_cluster_connections(tree.value(), conns_to_restore);
+
+        /* Restore the connections */
+        for(auto [source_inode, sink_inode]: conns_to_restore){
+            auto physical_tile = device_ctx.grid.get_physical_type({
+                rr_graph.node_xlow(source_inode),
+                rr_graph.node_ylow(source_inode),
+                rr_graph.node_layer(source_inode)
+            });
+            int source_pin = rr_graph.node_pin_num(source_inode);
+            int sink_pin = rr_graph.node_pin_num(sink_inode);
+
+            auto [_, subtile] = get_sub_tile_from_pin_physical_num(physical_tile, source_pin);
+
+            ClusterBlockId clb = placement_ctx.grid_blocks.block_at_location({
+                rr_graph.node_xlow(source_inode),
+                rr_graph.node_ylow(source_inode),
+                subtile,
+                rr_graph.node_layer(source_inode)
+            });
+
+            /* Look up pb graph pins from pb type if pin is not on tile, look up from block otherwise */
+            const t_pb_graph_pin* source_pb_graph_pin, *sink_pb_graph_pin;
+            if(is_pin_on_tile(physical_tile, sink_pin)){
+                sink_pb_graph_pin = get_pb_graph_node_pin_from_block_pin(clb, sink_pin);
+            }else{
+                sink_pb_graph_pin = get_pb_pin_from_pin_physical_num(physical_tile, sink_pin);
+            }
+            if(is_pin_on_tile(physical_tile, source_pin)){
+                source_pb_graph_pin = get_pb_graph_node_pin_from_block_pin(clb, source_pin);
+            }else{
+                source_pb_graph_pin = get_pb_pin_from_pin_physical_num(physical_tile, source_pin);
+            }
+
+            t_pb* pb = cluster_ctx.clb_nlist.block_pb(clb);
+
+            /* Route between the pins */
+            route_intra_cluster_conn(source_pb_graph_pin, sink_pb_graph_pin, convert_to_atom_net_id(net_id), pb);
+        }
+    }
+}
+
+/** Rebuild the ClusterNetId <-> AtomNetId lookup after compressing the ClusterNetlist
+ * Needs the "most recent" ClusterNetIds in atom_ctx.lookup: won't work after invalidating the ClusterNetIds twice */
+inline void rebuild_atom_nets_lookup(ClusteredNetlist::IdRemapper& remapped){
+    auto& atom_ctx = g_vpr_ctx.mutable_atom();
+    auto& atom_lookup = atom_ctx.lookup;
+
+    for(auto parent_net_id: atom_ctx.nlist.nets()){
+        auto atom_net_id = convert_to_atom_net_id(parent_net_id);
+        ClusterNetId old_clb_net = atom_lookup.clb_net(atom_net_id);
+        if(!old_clb_net)
+            continue;
+        ClusterNetId new_clb_net = remapped.new_net_id(old_clb_net);
+        atom_lookup.set_atom_clb_net(atom_net_id, new_clb_net);
+    }
+}
+
+/** Regenerate clustered netlist nets from routing results */
+void sync_clustered_netlist_to_routing(void){
+    auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
+    auto& place_ctx = g_vpr_ctx.mutable_placement();
+    auto& route_ctx = g_vpr_ctx.routing();
+    auto& clb_netlist = cluster_ctx.clb_nlist;
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& rr_graph = device_ctx.rr_graph;
+    auto& atom_ctx = g_vpr_ctx.mutable_atom();
+    auto& atom_lookup = atom_ctx.lookup;
+
+    /* 1. Remove all nets, pins and ports from the clustered netlist (except clocks) */
+    for(auto net_id: clb_netlist.nets()){
+        auto atom_net_id = atom_lookup.atom_net(net_id);
+        if(route_ctx.is_clock_net[atom_net_id])
+            continue;
+
+        clb_netlist.remove_net(net_id);
+        atom_lookup.set_atom_clb_net(AtomNetId::INVALID(), net_id);
+    }
+    for(auto pin_id: clb_netlist.pins()){
+        ClusterNetId clb_net_id = clb_netlist.pin_net(pin_id);
+        auto atom_net_id = atom_lookup.atom_net(clb_net_id);
+        if(atom_net_id && route_ctx.is_clock_net[atom_net_id])
+            continue;
+        clb_netlist.remove_pin(pin_id);
+    }
+    for(auto port_id: clb_netlist.ports()){
+        ClusterNetId clb_net_id = clb_netlist.port_net(port_id, 0);
+        auto atom_net_id = atom_lookup.atom_net(clb_net_id);
+        if(atom_net_id && route_ctx.is_clock_net[atom_net_id])
+            continue;
+        clb_netlist.remove_port(port_id);
+    }
+    /* 2. Reset all internal lookups for netlist */
+    auto remapped = clb_netlist.compress();
+    rebuild_atom_nets_lookup(remapped);
+
+    /* 3. Walk each routing in the atom netlist. If a node is on the tile, add a ClusterPinId for it.
+     * Add the associated net and port too if they don't exist */
+    for(auto parent_net_id: atom_ctx.nlist.nets()){
+        auto& tree = route_ctx.route_trees[parent_net_id];
+        AtomNetId atom_net_id = convert_to_atom_net_id(parent_net_id);
+
+        ClusterNetId clb_net_id;
+        for(auto& rt_node: tree->all_nodes()){
+            auto node_type = rr_graph.node_type(rt_node.inode);
+            if(node_type != IPIN && node_type != OPIN)
+                continue;
+
+            auto physical_tile = device_ctx.grid.get_physical_type({
+                rr_graph.node_xlow(rt_node.inode),
+                rr_graph.node_ylow(rt_node.inode),
+                rr_graph.node_layer(rt_node.inode)
+            });
+
+            int pin_index = rr_graph.node_pin_num(rt_node.inode);
+
+            auto [_, subtile] = get_sub_tile_from_pin_physical_num(physical_tile, pin_index);
+
+            ClusterBlockId clb = place_ctx.grid_blocks.block_at_location({
+                rr_graph.node_xlow(rt_node.inode),
+                rr_graph.node_ylow(rt_node.inode),
+                subtile,
+                rr_graph.node_layer(rt_node.inode)
+            });
+
+            if(!is_pin_on_tile(physical_tile, pin_index))
+                continue;
+
+            if(!clb_net_id){
+                clb_net_id = clb_netlist.create_net(atom_ctx.nlist.net_name(parent_net_id));
+                atom_lookup.set_atom_clb_net(atom_net_id, clb_net_id);
+            }
+
+            t_pb_graph_pin* pb_graph_pin = get_pb_graph_node_pin_from_block_pin(clb, pin_index);
+
+            ClusterPortId port_id = clb_netlist.find_port(clb, pb_graph_pin->port->name);
+            if(!port_id){
+                PortType port_type;
+                if(pb_graph_pin->port->is_clock)
+                    port_type = PortType::CLOCK;
+                else if(pb_graph_pin->port->type == IN_PORT)
+                    port_type = PortType::INPUT;
+                else if(pb_graph_pin->port->type == OUT_PORT)
+                    port_type = PortType::OUTPUT;
+                else
+                    VTR_ASSERT_MSG(false, "Unsupported port type");
+                port_id = clb_netlist.create_port(clb, pb_graph_pin->port->name, pb_graph_pin->port->num_pins, port_type);
+            }
+            PinType pin_type = node_type == OPIN ? PinType::DRIVER : PinType::SINK;
+
+            ClusterPinId new_pin = clb_netlist.create_pin(port_id, pb_graph_pin->pin_number, clb_net_id, pin_type, pb_graph_pin->pin_count_in_cluster);
+            clb_netlist.set_pin_net(new_pin, pin_type, clb_net_id);
+        }
+    }
+    /* 4. Rebuild internal cluster netlist lookups */
+    remapped = clb_netlist.compress();
+    rebuild_atom_nets_lookup(remapped);
+    /* 5. Rebuild place_ctx.physical_pins lookup
+     * TODO: maybe we don't need this fn and pin_index is enough? */
+    place_ctx.physical_pins.clear();
+    for(auto clb: clb_netlist.blocks()){
+        place_sync_external_block_connections(clb);
+    }
+    /* TODO: Remove rr_nodes added by the flat router */
+}
+
+/** Fix up pin rotation maps and the atom pin -> pb graph pin lookup for every block */
+void fixup_atom_pb_graph_pin_mapping(void){
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& atom_ctx = g_vpr_ctx.mutable_atom();
+
+    for(ClusterBlockId clb: cluster_ctx.clb_nlist.blocks()){    
+        /* Collect all innermost pb routes */
+        std::vector<int> sink_pb_route_ids;
+        t_pb* clb_pb = cluster_ctx.clb_nlist.block_pb(clb);
+        for(auto [pb_route_id, pb_route]: clb_pb->pb_route){
+            if(pb_route.sink_pb_pin_ids.empty())
+                sink_pb_route_ids.push_back(pb_route_id);
+        }
+
+        for(int sink_pb_route_id: sink_pb_route_ids){
+            t_pb_route& pb_route = clb_pb->pb_route.at(sink_pb_route_id);
+
+            const t_pb_graph_pin* atom_pbg_pin = pb_route.pb_graph_pin;
+            t_pb* atom_pb = clb_pb->find_mutable_pb(atom_pbg_pin->parent_node);
+            AtomBlockId atb = atom_ctx.lookup.pb_atom(atom_pb);
+            if(!atb)
+                continue;
+
+            /* Find atom port from pbg pin's model port */
+            AtomPortId atom_port = atom_ctx.nlist.find_atom_port(atb, atom_pbg_pin->port->model_port);
+            for(AtomPinId atom_pin: atom_ctx.nlist.port_pins(atom_port)){
+                /* Match net IDs from pb_route and atom netlist and connect in lookup */
+                if(pb_route.atom_net_id == atom_ctx.nlist.pin_net(atom_pin)){
+                    atom_ctx.lookup.set_atom_pin_pb_graph_pin(atom_pin, atom_pbg_pin);
+                    atom_pb->set_atom_pin_bit_index(atom_pbg_pin, atom_ctx.nlist.pin_port_bit(atom_pin));
+                }
+            }
+        }
+    }
+}
+
+/**
+ * Regenerate intra-cluster routing in the packer ctx from flat routing results.
+ * This function SHOULD be run ONLY when routing is finished!!!
+ */
+void sync_netlists_to_routing_flat(void) {
+    vtr::ScopedStartFinishTimer timer("Synchronize the packed netlist to routing optimization");
+
+    sync_clustered_netlist_to_routing();
+    sync_pb_routes_to_routing();
+    fixup_atom_pb_graph_pin_mapping();
+}
diff --git a/vpr/src/pack/sync_netlists_to_routing_flat.h b/vpr/src/pack/sync_netlists_to_routing_flat.h
new file mode 100644
index 00000000000..9dad836a88e
--- /dev/null
+++ b/vpr/src/pack/sync_netlists_to_routing_flat.h
@@ -0,0 +1,7 @@
+#include "netlist.h"
+
+/**
+ * Regenerate intra-cluster routing in the packer ctx from flat routing results.
+ * This function SHOULD be run ONLY when routing is finished!!!
+ */
+void sync_netlists_to_routing_flat(void);
diff --git a/vpr/src/route/annotate_routing.cpp b/vpr/src/route/annotate_routing.cpp
index f290b7ccbf2..111da1c0e07 100644
--- a/vpr/src/route/annotate_routing.cpp
+++ b/vpr/src/route/annotate_routing.cpp
@@ -3,12 +3,14 @@
  * from VPR to OpenFPGA
  *******************************************************************/
 /* Headers from vtrutil library */
+#include "vpr_error.h"
 #include "vtr_assert.h"
 #include "vtr_time.h"
 #include "vtr_log.h"
 
-#include "vpr_error.h"
+#include "route_utils.h"
 #include "rr_graph.h"
+
 #include "annotate_routing.h"
 
 /********************************************************************
@@ -18,29 +20,32 @@
  * - Mapped nodes should have valid net ids (except SOURCE and SINK nodes)
  * - Unmapped rr_node will use invalid ids
  *******************************************************************/
-vtr::vector<RRNodeId, ParentNetId> annotate_rr_node_nets(const Netlist<>& net_list,
+vtr::vector<RRNodeId, ClusterNetId> annotate_rr_node_nets(const ClusteringContext& cluster_ctx,
                                                          const DeviceContext& device_ctx,
-                                                         const RoutingContext& routing_ctx,
-                                                         const bool& verbose,
-                                                         bool is_flat) {
+                                                         const bool& verbose) {
     size_t counter = 0;
     vtr::ScopedStartFinishTimer timer("Annotating rr_node with routed nets");
 
     const auto& rr_graph = device_ctx.rr_graph;
 
-    vtr::vector<RRNodeId, ParentNetId> rr_node_nets;
-    rr_node_nets.resize(rr_graph.num_nodes(), ParentNetId::INVALID());
+    auto& netlist = cluster_ctx.clb_nlist;
+    vtr::vector<RRNodeId, ClusterNetId> rr_node_nets;
+    rr_node_nets.resize(rr_graph.num_nodes(), ClusterNetId::INVALID());
 
-    for (auto net_id : net_list.nets()) {
-        if (net_list.net_is_ignored(net_id)) {
+    for (auto net_id : netlist.nets()) {
+        if (netlist.net_is_ignored(net_id)) {
             continue;
         }
         /* Ignore used in local cluster only, reserved one CLB pin */
-        if (net_list.net_sinks(net_id).empty()) {
+        if (netlist.net_sinks(net_id).empty()) {
             continue;
         }
 
-        for (auto& rt_node : routing_ctx.route_trees[net_id].value().all_nodes()) {
+        auto& tree = get_route_tree_from_cluster_net_id(net_id);
+        if(!tree)
+            continue;
+
+        for (auto& rt_node : tree->all_nodes()) {
             const RRNodeId rr_node = rt_node.inode;
             /* Ignore source and sink nodes, they are the common node multiple starting and ending points */
             if ((SOURCE != rr_graph.node_type(rr_node))
@@ -56,14 +61,14 @@ vtr::vector<RRNodeId, ParentNetId> annotate_rr_node_nets(const Netlist<>& net_li
                     && (net_id != rr_node_nets[rr_node])) {
                     VPR_FATAL_ERROR(VPR_ERROR_ANALYSIS,
                                     "Detect two nets '%s' and '%s' that are mapped to the same rr_node '%ld'!\n%s\n",
-                                    net_list.net_name(net_id).c_str(),
-                                    net_list.net_name(rr_node_nets[rr_node]).c_str(),
+                                    netlist.net_name(net_id).c_str(),
+                                    netlist.net_name(rr_node_nets[rr_node]).c_str(),
                                     size_t(rr_node),
                                     describe_rr_node(rr_graph,
                                                      device_ctx.grid,
                                                      device_ctx.rr_indexed_data,
                                                      rr_node,
-                                                     is_flat)
+                                                     false)
                                         .c_str());
                 } else {
                     rr_node_nets[rr_node] = net_id;
diff --git a/vpr/src/route/annotate_routing.h b/vpr/src/route/annotate_routing.h
index 97f22f1c3bc..e3f66e6b258 100644
--- a/vpr/src/route/annotate_routing.h
+++ b/vpr/src/route/annotate_routing.h
@@ -10,10 +10,8 @@
  * Function declaration
  *******************************************************************/
 
-vtr::vector<RRNodeId, ParentNetId> annotate_rr_node_nets(const Netlist<>& net_list,
+vtr::vector<RRNodeId, ClusterNetId> annotate_rr_node_nets(const ClusteringContext& cluster_ctx,
                                                          const DeviceContext& device_ctx,
-                                                         const RoutingContext& routing_ctx,
-                                                         const bool& verbose,
-                                                         bool is_flat);
+                                                         const bool& verbose);
 
 #endif
diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp
index 74a84472388..4ffea24a99e 100644
--- a/vpr/src/route/route_common.cpp
+++ b/vpr/src/route/route_common.cpp
@@ -273,6 +273,8 @@ void init_route_structs(const Netlist<>& net_list,
     route_ctx.clb_opins_used_locally = alloc_and_load_clb_opins_used_locally();
     route_ctx.net_status.resize(net_list.nets().size());
 
+    route_ctx.is_flat = is_flat;
+
     if (has_choking_point && is_flat) {
         std::tie(route_ctx.net_terminal_groups, route_ctx.net_terminal_group_num) = load_net_terminal_groups(device_ctx.rr_graph,
                                                                                                              net_list,
diff --git a/vpr/src/route/route_utils.h b/vpr/src/route/route_utils.h
index fddad8247dd..3628d5abda9 100644
--- a/vpr/src/route/route_utils.h
+++ b/vpr/src/route/route_utils.h
@@ -70,6 +70,17 @@ void generate_route_timing_reports(const t_router_opts& router_opts,
 /** Get the maximum number of pins used in the netlist (used to allocate things) */
 int get_max_pins_per_net(const Netlist<>& net_list);
 
+inline const vtr::optional<RouteTree>& get_route_tree_from_cluster_net_id(ClusterNetId net_id){
+    auto& route_ctx = g_vpr_ctx.routing();
+    if(!route_ctx.is_flat){
+        return route_ctx.route_trees[ParentNetId(net_id)];
+    }else{
+        auto& atom_lookup = g_vpr_ctx.atom().lookup;
+        AtomNetId atom_id = atom_lookup.atom_net(net_id);
+        return route_ctx.route_trees[ParentNetId(atom_id)];
+    }
+}
+
 /** Initialize net_delay based on best-case delay estimates from the router lookahead. */
 void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead,
                                    const Netlist<>& net_list,