From 3f55019fae3d5cddd55036ab04125a8812c94d88 Mon Sep 17 00:00:00 2001 From: Duck Deux Date: Tue, 10 Sep 2024 10:49:35 -0700 Subject: [PATCH] address review --- libs/libarchfpga/src/physical_types.h | 4 +- vpr/src/pack/post_routing_pb_pin_fixup.cpp | 14 ---- vpr/src/pack/post_routing_pb_pin_fixup.h | 17 ++++ .../pack/sync_netlists_to_routing_flat.cpp | 83 ++++++++++++++----- vpr/src/pack/sync_netlists_to_routing_flat.h | 19 ++++- vpr/src/route/annotate_routing.cpp | 11 +-- vpr/src/route/annotate_routing.h | 10 +-- vpr/src/route/route_utils.h | 2 + 8 files changed, 105 insertions(+), 55 deletions(-) diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index 917a8b38d62..3fd2834d65c 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -1671,8 +1671,8 @@ constexpr std::array SWITCH_T */ constexpr const char* VPR_DELAYLESS_SWITCH_NAME = "__vpr_delayless_switch__"; -/* Internal switch: used by the flat router */ -constexpr const char* VPR_INTERNAL_SWITCH_NAME = "Internal Switch"; +/* An intracluster switch automatically added to the RRG by the flat router. */ +constexpr const char* VPR_INTERNAL_SWITCH_NAME = "__vpr_intra_cluster_switch__"; enum class BufferSize { AUTO, diff --git a/vpr/src/pack/post_routing_pb_pin_fixup.cpp b/vpr/src/pack/post_routing_pb_pin_fixup.cpp index 8e39e8bc280..417f1a84c38 100644 --- a/vpr/src/pack/post_routing_pb_pin_fixup.cpp +++ b/vpr/src/pack/post_routing_pb_pin_fixup.cpp @@ -1019,20 +1019,6 @@ static void update_cluster_routing_traces_with_post_routing_results(AtomContext& pb->pb_route = new_pb_routes; } -/******************************************************************** - * Top-level function to synchronize a packed netlist to routing results - * The problem comes from a mismatch between the packing and routing results - * When there are equivalent input/output for any grids, router will try - * to swap the net mapping among these pins so as to achieve best - * routing optimization. - * However, it will cause the packing results out-of-date as the net mapping - * of each grid remain untouched once packing is done. - * This function aims to fix the mess after routing so that the net mapping - * can be synchronized - * - * Note: - * - This function SHOULD be run ONLY when routing is finished!!! - *******************************************************************/ void sync_netlists_to_routing(const Netlist<>& net_list, const DeviceContext& device_ctx, AtomContext& atom_ctx, diff --git a/vpr/src/pack/post_routing_pb_pin_fixup.h b/vpr/src/pack/post_routing_pb_pin_fixup.h index 026bcacb356..2459da04487 100644 --- a/vpr/src/pack/post_routing_pb_pin_fixup.h +++ b/vpr/src/pack/post_routing_pb_pin_fixup.h @@ -9,6 +9,23 @@ /******************************************************************** * Function declaration *******************************************************************/ + +/******************************************************************** + * Top-level function to synchronize a packed netlist to routing results + * The problem comes from a mismatch between the packing and routing results + * When there are equivalent input/output for any grids, the router will try + * to swap the net mapping among these pins so as to achieve best + * routing optimization. + * However, it will cause the packing results out-of-date as the net mapping + * of each grid remain untouched once packing is done. + * This function aims to fix the mess after routing so that the net mapping + * can be synchronized + * + * Note: + * - This function SHOULD be run ONLY when routing is finished. + * - This function only handles the two-stage routing results. + * See \see sync_netlists_to_routing_flat() for the flat routing case. + *******************************************************************/ void sync_netlists_to_routing(const Netlist<>& net_list, const DeviceContext& device_ctx, AtomContext& atom_ctx, diff --git a/vpr/src/pack/sync_netlists_to_routing_flat.cpp b/vpr/src/pack/sync_netlists_to_routing_flat.cpp index aa55239a1dc..16d5c9de113 100644 --- a/vpr/src/pack/sync_netlists_to_routing_flat.cpp +++ b/vpr/src/pack/sync_netlists_to_routing_flat.cpp @@ -1,8 +1,9 @@ -/******************************************************************** - * This file includes functions to fix up the pb pin mapping results - * after routing optimization - *******************************************************************/ -/* Headers from vtrutil library */ +/** +* @file sync_netlists_to_routing_flat.cpp +* +* @brief Implementation for \see sync_netlists_to_routing_flat(). +*/ + #include "clustered_netlist_fwd.h" #include "clustered_netlist_utils.h" #include "logic_types.h" @@ -20,12 +21,50 @@ #include "sync_netlists_to_routing_flat.h" +/* Static function decls (file-scope) */ + +/** Get intra-cluster connections from a given RouteTree. Output pairs to \p out_connections . */ static void get_intra_cluster_connections(const RouteTree& tree, std::vector>& out_connections); + +/** Rudimentary intra-cluster router between two pb_graph pins. + * This is needed because the flat router compresses the RRG reducing singular paths into nodes. + * We need to unpack it to get valid packing results, which is the purpose of this simple BFS router. + * Outputs the path to the pb_routes field of \p out_pb . */ static void route_intra_cluster_conn(const t_pb_graph_pin* source_pin, const t_pb_graph_pin* sink_pin, AtomNetId net_id, t_pb* out_pb); + +/** Rebuild the pb.pb_routes struct for each cluster block from flat routing results. + * The pb_routes struct holds all intra-cluster routing. */ static void sync_pb_routes_to_routing(void); + +/** Rebuild ClusteredNetlist from flat routing results, since some nets can move in/out of a block after routing. */ static void sync_clustered_netlist_to_routing(void); + +/** Rebuild atom_lookup.atom_pin_pb_graph_pin and pb.atom_pin_bit_index from flat routing results. + * These contain mappings between the AtomNetlist and the physical pins, which are invalidated after flat routing due to changed pin rotations. + * (i.e. the primitive has equivalent input pins and flat routing used a different pin) */ static void fixup_atom_pb_graph_pin_mapping(void); + +/* Function definitions */ + +/** Is the clock net found in the routing results? + * (If not, clock_modeling is probably ideal and we should preserve clock routing while rebuilding.) */ +inline bool is_clock_net_routed(void){ + auto& atom_ctx = g_vpr_ctx.atom(); + auto& route_ctx = g_vpr_ctx.routing(); + + for(auto net_id: atom_ctx.nlist.nets()){ + auto& tree = route_ctx.route_trees[net_id]; + if(!tree) + continue; + if(route_ctx.is_clock_net[net_id]) /* Clock net has routing */ + return true; + } + + return false; +} + +/** Get the ClusterBlockId for a given RRNodeId. */ inline ClusterBlockId get_cluster_block_from_rr_node(RRNodeId inode){ auto& device_ctx = g_vpr_ctx.device(); auto& place_ctx = g_vpr_ctx.placement(); @@ -51,7 +90,6 @@ inline ClusterBlockId get_cluster_block_from_rr_node(RRNodeId inode){ return clb; } -/* Output all intra-cluster connections for a RouteTreeNode */ static void get_intra_cluster_connections(const RouteTree& tree, std::vector>& out_connections){ auto& rr_graph = g_vpr_ctx.device().rr_graph; @@ -73,10 +111,6 @@ static void get_intra_cluster_connections(const RouteTree& tree, std::vector visited; std::deque queue; @@ -117,7 +151,7 @@ static void route_intra_cluster_conn(const t_pb_graph_pin* source_pin, const t_p } path.push_back(source_pin); - /* Output the path into out_pb_routes (start from source) */ + /* Output the path into out_pb, starting from source. This is where the pb_route is updated */ int prev_pin_id = -1; for(auto it = path.rbegin(); it != path.rend(); ++it){ cur_pin = *it; @@ -155,14 +189,17 @@ static void sync_pb_routes_to_routing(void){ auto& route_ctx = g_vpr_ctx.routing(); auto& rr_graph = device_ctx.rr_graph; + /* Was the clock net routed? */ + bool clock_net_is_routed = is_clock_net_routed(); + /* Clear out existing pb_routes: they were made by the intra cluster router and are invalid now */ for (ClusterBlockId clb_blk_id : cluster_ctx.clb_nlist.blocks()) { - /* Only erase entries which are not associated with a clock net: the router doesn't touch the clock nets - * XXX: Assumes --clock_modeling ideal */ + /* If we don't have routing for the clock net, don't erase entries associated with a clock net. + * Otherwise we won't have data to rebuild them */ std::vector pins_to_erase; auto& pb_routes = cluster_ctx.clb_nlist.block_pb(clb_blk_id)->pb_route; for(auto& [pin, pb_route]: pb_routes){ - if(!route_ctx.is_clock_net[pb_route.atom_net_id]) + if(clock_net_is_routed || !route_ctx.is_clock_net[pb_route.atom_net_id]) pins_to_erase.push_back(pin); } @@ -221,8 +258,9 @@ static void sync_pb_routes_to_routing(void){ } } -/** Rebuild the ClusterNetId <-> AtomNetId lookup after compressing the ClusterNetlist - * Needs the "most recent" ClusterNetIds in atom_ctx.lookup: won't work after invalidating the ClusterNetIds twice */ +/** Rebuild the ClusterNetId <-> AtomNetId lookup after compressing the ClusterNetlist. + * Needs the old ClusterNetIds in atom_ctx.lookup. Won't work after calling compress() twice, + * since we won't have access to the old IDs in the IdRemapper anywhere. */ inline void rebuild_atom_nets_lookup(ClusteredNetlist::IdRemapper& remapped){ auto& atom_ctx = g_vpr_ctx.mutable_atom(); auto& atom_lookup = atom_ctx.lookup; @@ -248,10 +286,14 @@ static void sync_clustered_netlist_to_routing(void){ auto& atom_ctx = g_vpr_ctx.mutable_atom(); auto& atom_lookup = atom_ctx.lookup; - /* 1. Remove all nets, pins and ports from the clustered netlist (except clocks) */ + bool clock_net_is_routed = is_clock_net_routed(); + + /* 1. Remove all nets, pins and ports from the clustered netlist. + * If the clock net is not routed, don't remove entries for the clock net + * otherwise we won't have data to rebuild them. */ for(auto net_id: clb_netlist.nets()){ auto atom_net_id = atom_lookup.atom_net(net_id); - if(route_ctx.is_clock_net[atom_net_id]) + if(!clock_net_is_routed && route_ctx.is_clock_net[atom_net_id]) continue; clb_netlist.remove_net(net_id); @@ -260,14 +302,14 @@ static void sync_clustered_netlist_to_routing(void){ for(auto pin_id: clb_netlist.pins()){ ClusterNetId clb_net_id = clb_netlist.pin_net(pin_id); auto atom_net_id = atom_lookup.atom_net(clb_net_id); - if(atom_net_id && route_ctx.is_clock_net[atom_net_id]) + if(!clock_net_is_routed && atom_net_id && route_ctx.is_clock_net[atom_net_id]) continue; clb_netlist.remove_pin(pin_id); } for(auto port_id: clb_netlist.ports()){ ClusterNetId clb_net_id = clb_netlist.port_net(port_id, 0); auto atom_net_id = atom_lookup.atom_net(clb_net_id); - if(atom_net_id && route_ctx.is_clock_net[atom_net_id]) + if(!clock_net_is_routed && atom_net_id && route_ctx.is_clock_net[atom_net_id]) continue; clb_netlist.remove_port(port_id); } @@ -346,7 +388,6 @@ static void sync_clustered_netlist_to_routing(void){ } } -/** Fix up pin rotation maps and the atom pin -> pb graph pin lookup for every block */ static void fixup_atom_pb_graph_pin_mapping(void){ auto& cluster_ctx = g_vpr_ctx.clustering(); auto& atom_ctx = g_vpr_ctx.mutable_atom(); diff --git a/vpr/src/pack/sync_netlists_to_routing_flat.h b/vpr/src/pack/sync_netlists_to_routing_flat.h index 9dad836a88e..9403eb1c2a9 100644 --- a/vpr/src/pack/sync_netlists_to_routing_flat.h +++ b/vpr/src/pack/sync_netlists_to_routing_flat.h @@ -1,7 +1,18 @@ #include "netlist.h" -/** - * Regenerate intra-cluster routing in the packer ctx from flat routing results. - * This function SHOULD be run ONLY when routing is finished!!! - */ + /******************************************************************** + * Top-level function to synchronize packing results to routing results. + * Flat routing invalidates the ClusteredNetlist since nets may be routed + * inside or outside a block and changes virtually all intrablock routing. + * This function: + * - rebuilds ClusteredNetlist + * - rebuilds all pb_routes + * - rebuilds pb graph <-> atom pin mapping in AtomLookup + * taking routing results as the source of truth. + * + * Note: + * - This function SHOULD be run ONLY when routing is finished. + * - This function only handles the flat routing results. + * See \see sync_netlists_to_routing() for the two-stage case. + *******************************************************************/ void sync_netlists_to_routing_flat(void); diff --git a/vpr/src/route/annotate_routing.cpp b/vpr/src/route/annotate_routing.cpp index 111da1c0e07..42b798d4d02 100644 --- a/vpr/src/route/annotate_routing.cpp +++ b/vpr/src/route/annotate_routing.cpp @@ -1,8 +1,8 @@ /******************************************************************** * This file includes functions that are used to annotate routing results - * from VPR to OpenFPGA + * from VPR to OpenFPGA. (i.e. create a mapping from RRNodeIds to ClusterNetIds) *******************************************************************/ -/* Headers from vtrutil library */ + #include "vpr_error.h" #include "vtr_assert.h" #include "vtr_time.h" @@ -13,13 +13,6 @@ #include "annotate_routing.h" -/******************************************************************** - * Create a mapping between each rr_node and its mapped nets - * based on VPR routing results - * - Store the net ids mapped to each routing resource nodes - * - Mapped nodes should have valid net ids (except SOURCE and SINK nodes) - * - Unmapped rr_node will use invalid ids - *******************************************************************/ vtr::vector annotate_rr_node_nets(const ClusteringContext& cluster_ctx, const DeviceContext& device_ctx, const bool& verbose) { diff --git a/vpr/src/route/annotate_routing.h b/vpr/src/route/annotate_routing.h index e3f66e6b258..cf548e1e0fe 100644 --- a/vpr/src/route/annotate_routing.h +++ b/vpr/src/route/annotate_routing.h @@ -1,15 +1,15 @@ #ifndef ANNOTATE_ROUTING_H #define ANNOTATE_ROUTING_H -/******************************************************************** - * Include header files that are required by function declaration - *******************************************************************/ #include "vpr_context.h" /******************************************************************** - * Function declaration + * Create a mapping between each rr_node and its mapped nets + * based on VPR routing results. + * - Store the net ids mapped to each routing resource nodes + * - Mapped nodes should have valid net ids (except SOURCE and SINK nodes) + * - Unmapped rr_node will use invalid ids *******************************************************************/ - vtr::vector annotate_rr_node_nets(const ClusteringContext& cluster_ctx, const DeviceContext& device_ctx, const bool& verbose); diff --git a/vpr/src/route/route_utils.h b/vpr/src/route/route_utils.h index 3628d5abda9..9148180a94c 100644 --- a/vpr/src/route/route_utils.h +++ b/vpr/src/route/route_utils.h @@ -70,6 +70,8 @@ void generate_route_timing_reports(const t_router_opts& router_opts, /** Get the maximum number of pins used in the netlist (used to allocate things) */ int get_max_pins_per_net(const Netlist<>& net_list); +/** Get the RouteTree associated with the ClusterNetId. + * Flat routing maps AtomNetIds to RouteTrees instead, so we need to first look up the associated AtomNetId. */ inline const vtr::optional& get_route_tree_from_cluster_net_id(ClusterNetId net_id){ auto& route_ctx = g_vpr_ctx.routing(); if(!route_ctx.is_flat){