From 8f90f265ff66ffb91ba008cb38cd44fcdb11155f Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Thu, 23 Jan 2020 16:38:51 -0800 Subject: [PATCH 1/7] Move call location of ClockRRGraphBuilder and use alloc_and_load_edges. By moving ClockRRGraphBuilder earlier in the rr graph flow, several parts of ClockRRGraphBuilder::create_and_append_clock_rr_graph can be avoided, as they were duplicating work that the original build_rr_graph flow was already doing (init_fan, mapping arch switch to rr switch, partition_edges). This new code should also fully preallocate the rr_node array, though this is not required by the code. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/clock_connection_builders.cpp | 42 +++-- vpr/src/route/clock_connection_builders.h | 13 +- vpr/src/route/clock_network_builders.cpp | 164 ++++++++++++++++---- vpr/src/route/clock_network_builders.h | 42 +++-- vpr/src/route/route_timing.cpp | 2 +- vpr/src/route/rr_graph.cpp | 43 +++-- vpr/src/route/rr_graph2.h | 1 + vpr/src/route/rr_graph_clock.cpp | 119 ++++---------- vpr/src/route/rr_graph_clock.h | 46 +++--- 9 files changed, 275 insertions(+), 197 deletions(-) diff --git a/vpr/src/route/clock_connection_builders.cpp b/vpr/src/route/clock_connection_builders.cpp index e8fca69771b..86bdf552c44 100644 --- a/vpr/src/route/clock_connection_builders.cpp +++ b/vpr/src/route/clock_connection_builders.cpp @@ -39,18 +39,24 @@ void RoutingToClockConnection::set_fc_val(float fc_val) { * RoutingToClockConnection (member functions) */ -void RoutingToClockConnection::create_switches(const ClockRRGraphBuilder& clock_graph) { +size_t RoutingToClockConnection::estimate_additional_nodes() { + return 1; +} + +void RoutingToClockConnection::create_switches(const ClockRRGraphBuilder& clock_graph, t_rr_edge_info_set* rr_edges_to_create) { // Initialize random seed // Must be done durring every call inorder for restored rr_graphs after a binary // search to be consistant std::srand(seed); - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& rr_nodes = device_ctx.rr_nodes; + auto& device_ctx = g_vpr_ctx.device(); auto& rr_node_indices = device_ctx.rr_node_indices; int virtual_clock_network_root_idx = create_virtual_clock_network_sink_node(switch_location.x, switch_location.y); - device_ctx.virtual_clock_network_root_idx = virtual_clock_network_root_idx; + { + auto& mut_device_ctx = g_vpr_ctx.mutable_device(); + mut_device_ctx.virtual_clock_network_root_idx = virtual_clock_network_root_idx; + } // rr_node indices for x and y channel routing wires and clock wires to connect to auto x_wire_indices = get_rr_node_chan_wires_at_location( @@ -68,18 +74,18 @@ void RoutingToClockConnection::create_switches(const ClockRRGraphBuilder& clock_ // Connect to x-channel wires unsigned num_wires_x = x_wire_indices.size() * fc; for (size_t i = 0; i < num_wires_x; i++) { - rr_nodes[x_wire_indices[i]].add_edge(clock_index, rr_switch_idx); + rr_edges_to_create->emplace_back(x_wire_indices[i], clock_index, rr_switch_idx); } // Connect to y-channel wires unsigned num_wires_y = y_wire_indices.size() * fc; for (size_t i = 0; i < num_wires_y; i++) { - rr_nodes[y_wire_indices[i]].add_edge(clock_index, rr_switch_idx); + rr_edges_to_create->emplace_back(y_wire_indices[i], clock_index, rr_switch_idx); } // Connect to virtual clock sink node // used by the two stage router - rr_nodes[clock_index].add_edge(virtual_clock_network_root_idx, rr_switch_idx); + rr_edges_to_create->emplace_back(clock_index, virtual_clock_network_root_idx, rr_switch_idx); } } @@ -134,10 +140,13 @@ void ClockToClockConneciton::set_fc_val(float fc_val) { * ClockToClockConneciton (member functions) */ -void ClockToClockConneciton::create_switches(const ClockRRGraphBuilder& clock_graph) { - auto& device_ctx = g_vpr_ctx.mutable_device(); +size_t ClockToClockConneciton::estimate_additional_nodes() { + return 0; +} + +void ClockToClockConneciton::create_switches(const ClockRRGraphBuilder& clock_graph, t_rr_edge_info_set* rr_edges_to_create) { + auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; - auto& rr_nodes = device_ctx.rr_nodes; auto to_locations = clock_graph.get_switch_locations(to_clock, to_switch); @@ -179,7 +188,7 @@ void ClockToClockConneciton::create_switches(const ClockRRGraphBuilder& clock_gr if (from_itter == from_rr_node_indices.end()) { from_itter = from_rr_node_indices.begin(); } - rr_nodes[*from_itter].add_edge(to_index, rr_switch_idx); + rr_edges_to_create->emplace_back(*from_itter, to_index, rr_switch_idx); from_itter++; } } @@ -211,9 +220,12 @@ void ClockToPinsConnection::set_fc_val(float fc_val) { * ClockToPinsConnection (member functions) */ -void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_graph) { - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& rr_nodes = device_ctx.rr_nodes; +size_t ClockToPinsConnection::estimate_additional_nodes() { + return 0; +} + +void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_graph, t_rr_edge_info_set* rr_edges_to_create) { + auto& device_ctx = g_vpr_ctx.device(); auto& rr_node_indices = device_ctx.rr_node_indices; auto& grid = device_ctx.grid; @@ -290,7 +302,7 @@ void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_gra //Create edges depending on Fc for (size_t i = 0; i < clock_network_indices.size() * fc; i++) { - rr_nodes[clock_network_indices[i]].add_edge(clock_pin_node_idx, rr_switch_idx); + rr_edges_to_create->emplace_back(clock_network_indices[i], clock_pin_node_idx, rr_switch_idx); } } } diff --git a/vpr/src/route/clock_connection_builders.h b/vpr/src/route/clock_connection_builders.h index 8076907b656..f040e1c8062 100644 --- a/vpr/src/route/clock_connection_builders.h +++ b/vpr/src/route/clock_connection_builders.h @@ -5,6 +5,7 @@ #include "clock_fwd.h" +#include "rr_graph2.h" #include "rr_graph_clock.h" class ClockRRGraphBuilder; @@ -26,7 +27,8 @@ class ClockConnection { /* * Member functions */ - virtual void create_switches(const ClockRRGraphBuilder& clock_graph) = 0; + virtual void create_switches(const ClockRRGraphBuilder& clock_graph, t_rr_edge_info_set* rr_edges_to_create) = 0; + virtual size_t estimate_additional_nodes() = 0; }; class RoutingToClockConnection : public ClockConnection { @@ -53,7 +55,8 @@ class RoutingToClockConnection : public ClockConnection { * Member functions */ /* Connects the inter-block routing to the clock source at the specified coordinates */ - void create_switches(const ClockRRGraphBuilder& clock_graph); + void create_switches(const ClockRRGraphBuilder& clock_graph, t_rr_edge_info_set* rr_edges_to_create) override; + size_t estimate_additional_nodes() override; int create_virtual_clock_network_sink_node(int x, int y); }; @@ -81,7 +84,8 @@ class ClockToClockConneciton : public ClockConnection { * Member functions */ /* Connects a clock tap to a clock source */ - void create_switches(const ClockRRGraphBuilder& clock_graph); + void create_switches(const ClockRRGraphBuilder& clock_graph, t_rr_edge_info_set* rr_edges_to_create) override; + size_t estimate_additional_nodes() override; }; /* This class currently only supports Clock Network to clock pin connection. @@ -106,7 +110,8 @@ class ClockToPinsConnection : public ClockConnection { * Member functions */ /* Connects the clock tap to block pins */ - void create_switches(const ClockRRGraphBuilder& clock_graph); + void create_switches(const ClockRRGraphBuilder& clock_graph, t_rr_edge_info_set* rr_edges_to_create) override; + size_t estimate_additional_nodes() override; }; #endif diff --git a/vpr/src/route/clock_network_builders.cpp b/vpr/src/route/clock_network_builders.cpp index ea367bdbb6b..4fe65c4ed3a 100644 --- a/vpr/src/route/clock_network_builders.cpp +++ b/vpr/src/route/clock_network_builders.cpp @@ -61,9 +61,11 @@ void ClockNetwork::set_num_instance(int num_inst) { */ void ClockNetwork::create_rr_nodes_for_clock_network_wires(ClockRRGraphBuilder& clock_graph, + std::vector* rr_nodes, + t_rr_edge_info_set* rr_edges_to_create, int num_segments) { for (int inst_num = 0; inst_num < get_num_inst(); inst_num++) { - create_rr_nodes_and_internal_edges_for_one_instance(clock_graph, num_segments); + create_rr_nodes_and_internal_edges_for_one_instance(clock_graph, rr_nodes, rr_edges_to_create, num_segments); } } @@ -173,15 +175,58 @@ void ClockRib::create_segments(std::vector& segment_inf) { populate_segment_values(index, name, length, x_chan_wire.layer, segment_inf); } +size_t ClockRib::estimate_additional_nodes() { + const auto& device_ctx = g_vpr_ctx.device(); + const auto& grid = device_ctx.grid; + + // Avoid an infinite loop + VTR_ASSERT(repeat.y > 0); + VTR_ASSERT(repeat.x > 0); + + size_t num_additional_nodes = 0; + for (unsigned y = x_chan_wire.position; y < grid.height() - 1; y += repeat.y) { + for (unsigned x_start = x_chan_wire.start; x_start < grid.width() - 1; x_start += repeat.x) { + unsigned drive_x = x_start + drive.offset; + unsigned x_end = x_start + x_chan_wire.length; + + // Adjust for boundry conditions + int x_offset = 0; + if ((x_start == 0) || // CHANX wires left boundry + (x_start + repeat.x == x_end)) // Avoid overlap + { + x_offset = 1; + } + if (x_end > grid.width() - 2) { + x_end = grid.width() - 2; // CHANX wires right boundry + } + + // Dont create rib if drive point is not reachable + if (drive_x > grid.width() - 2 || drive_x >= x_end || drive_x <= (x_start + x_offset)) { + continue; + } + + // Dont create rib if wire segment is too small + if ((x_start + x_offset) >= x_end) { + continue; + } + + num_additional_nodes += 3; + } + } + + return num_additional_nodes; +} + void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, + std::vector* rr_nodes, + t_rr_edge_info_set* rr_edges_to_create, int num_segments) { // Only chany wires need to know the number of segments inorder // to calculate the cost_index (void)num_segments; - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& rr_nodes = device_ctx.rr_nodes; - auto& grid = device_ctx.grid; + const auto& device_ctx = g_vpr_ctx.device(); + const auto& grid = device_ctx.grid; int ptc_num = clock_graph.get_and_increment_chanx_ptc_num(); // used for drawing @@ -255,8 +300,8 @@ void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphB clock_graph); // connect drive point to each half rib using a directed switch - rr_nodes[drive_node_idx].add_edge(left_node_idx, drive.switch_idx); - rr_nodes[drive_node_idx].add_edge(right_node_idx, drive.switch_idx); + rr_edges_to_create->emplace_back(drive_node_idx, left_node_idx, drive.switch_idx); + rr_edges_to_create->emplace_back(drive_node_idx, right_node_idx, drive.switch_idx); } } } @@ -266,17 +311,18 @@ int ClockRib::create_chanx_wire(int x_start, int y, int ptc_num, e_direction direction, - std::vector& rr_nodes) { - rr_nodes.emplace_back(); - auto node_index = rr_nodes.size() - 1; - - rr_nodes[node_index].set_coordinates(x_start, y, x_end, y); - rr_nodes[node_index].set_type(CHANX); - rr_nodes[node_index].set_capacity(1); - rr_nodes[node_index].set_track_num(ptc_num); - rr_nodes[node_index].set_rc_index(find_create_rr_rc_data( + std::vector* rr_nodes) { + rr_nodes->emplace_back(); + auto node_index = rr_nodes->size() - 1; + auto& node = rr_nodes->back(); + + node.set_coordinates(x_start, y, x_end, y); + node.set_type(CHANX); + node.set_capacity(1); + node.set_track_num(ptc_num); + node.set_rc_index(find_create_rr_rc_data( x_chan_wire.layer.r_metal, x_chan_wire.layer.c_metal)); - rr_nodes[node_index].set_direction(direction); + node.set_direction(direction); short seg_index = 0; switch (direction) { @@ -293,7 +339,7 @@ int ClockRib::create_chanx_wire(int x_start, VTR_ASSERT_MSG(false, "Unidentified direction type for clock rib"); break; } - rr_nodes[node_index].set_cost_index(CHANX_COST_INDEX_START + seg_index); // Actual value set later + node.set_cost_index(CHANX_COST_INDEX_START + seg_index); // Actual value set later return node_index; } @@ -419,10 +465,54 @@ void ClockSpine::create_segments(std::vector& segment_inf) { populate_segment_values(index, name, length, y_chan_wire.layer, segment_inf); } +size_t ClockSpine::estimate_additional_nodes() { + size_t num_additional_nodes = 0; + + auto& device_ctx = g_vpr_ctx.device(); + auto& grid = device_ctx.grid; + + // Avoid an infinite loop + VTR_ASSERT(repeat.y > 0); + VTR_ASSERT(repeat.x > 0); + + for (unsigned x = y_chan_wire.position; x < grid.width() - 1; x += repeat.x) { + for (unsigned y_start = y_chan_wire.start; y_start < grid.height() - 1; y_start += repeat.y) { + unsigned drive_y = y_start + drive.offset; + unsigned y_end = y_start + y_chan_wire.length; + + // Adjust for boundry conditions + unsigned y_offset = 0; + if ((y_start == 0) || // CHANY wires bottom boundry, start above the LB + (y_start + repeat.y == y_end)) // Avoid overlap + { + y_offset = 1; + } + if (y_end > grid.height() - 2) { + y_end = grid.height() - 2; // CHANY wires top boundry, dont go above the LB + } + + // Dont create spine if drive point is not reachable + if (drive_y > grid.width() - 2 || drive_y >= y_end || drive_y <= (y_start + y_offset)) { + continue; + } + + // Dont create spine if wire segment is too small + if ((y_start + y_offset) >= y_end) { + continue; + } + + num_additional_nodes += 3; + } + } + + return num_additional_nodes; +} + void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, + std::vector* rr_nodes, + t_rr_edge_info_set* rr_edges_to_create, int num_segments) { - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& rr_nodes = device_ctx.rr_nodes; + auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; int ptc_num = clock_graph.get_and_increment_chany_ptc_num(); // used for drawing @@ -503,8 +593,8 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap clock_graph); // connect drive point to each half spine using a directed switch - rr_nodes[drive_node_idx].add_edge(left_node_idx, drive.switch_idx); - rr_nodes[drive_node_idx].add_edge(right_node_idx, drive.switch_idx); + rr_edges_to_create->emplace_back(drive_node_idx, left_node_idx, drive.switch_idx); + rr_edges_to_create->emplace_back(drive_node_idx, right_node_idx, drive.switch_idx); } } } @@ -514,18 +604,19 @@ int ClockSpine::create_chany_wire(int y_start, int x, int ptc_num, e_direction direction, - std::vector& rr_nodes, + std::vector* rr_nodes, int num_segments) { - rr_nodes.emplace_back(); - auto node_index = rr_nodes.size() - 1; - - rr_nodes[node_index].set_coordinates(x, y_start, x, y_end); - rr_nodes[node_index].set_type(CHANY); - rr_nodes[node_index].set_capacity(1); - rr_nodes[node_index].set_track_num(ptc_num); - rr_nodes[node_index].set_rc_index(find_create_rr_rc_data( + rr_nodes->emplace_back(); + auto node_index = rr_nodes->size() - 1; + auto& node = rr_nodes->back(); + + node.set_coordinates(x, y_start, x, y_end); + node.set_type(CHANY); + node.set_capacity(1); + node.set_track_num(ptc_num); + node.set_rc_index(find_create_rr_rc_data( y_chan_wire.layer.r_metal, y_chan_wire.layer.c_metal)); - rr_nodes[node_index].set_direction(direction); + node.set_direction(direction); short seg_index = 0; switch (direction) { @@ -542,7 +633,7 @@ int ClockSpine::create_chany_wire(int y_start, VTR_ASSERT_MSG(false, "Unidentified direction type for clock rib"); break; } - rr_nodes[node_index].set_cost_index(CHANX_COST_INDEX_START + num_segments + seg_index); + node.set_cost_index(CHANX_COST_INDEX_START + num_segments + seg_index); return node_index; } @@ -573,11 +664,20 @@ void ClockHTree::create_segments(std::vector& segment_inf) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "HTrees are not yet supported.\n"); } + +size_t ClockHTree::estimate_additional_nodes() { + return 0; +} + void ClockHTree::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, + std::vector* rr_nodes, + t_rr_edge_info_set* rr_edges_to_create, int num_segments) { //Remove unused parameter warning (void)clock_graph; (void)num_segments; + (void)rr_nodes; + (void)rr_edges_to_create; VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "HTrees are not yet supported.\n"); } diff --git a/vpr/src/route/clock_network_builders.h b/vpr/src/route/clock_network_builders.h index c4db346ae00..f6ab7a7dfd4 100644 --- a/vpr/src/route/clock_network_builders.h +++ b/vpr/src/route/clock_network_builders.h @@ -8,6 +8,7 @@ #include "vpr_types.h" +#include "rr_graph2.h" #include "rr_graph_clock.h" class ClockRRGraphBuilder; @@ -101,9 +102,17 @@ class ClockNetwork { /* Creates the RR nodes for the clock network wires and adds them to the reverse lookup * in ClockRRGraphBuilder. The reverse lookup maps the nodes to their switch point locations */ void create_rr_nodes_for_clock_network_wires(ClockRRGraphBuilder& clock_graph, + std::vector* rr_nodes, + t_rr_edge_info_set* rr_edges_to_create, int num_segments); virtual void create_segments(std::vector& segment_inf) = 0; - virtual void create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, int num_segments) = 0; + virtual void create_rr_nodes_and_internal_edges_for_one_instance( + ClockRRGraphBuilder& clock_graph, + std::vector* rr_nodes, + t_rr_edge_info_set* rr_edges_to_create, + int num_segments) + = 0; + virtual size_t estimate_additional_nodes() = 0; }; class ClockRib : public ClockNetwork { @@ -134,7 +143,7 @@ class ClockRib : public ClockNetwork { /* * Getters */ - ClockType get_network_type() const; + ClockType get_network_type() const override; /* * Setters @@ -152,15 +161,18 @@ class ClockRib : public ClockNetwork { /* * Member functions */ - void create_segments(std::vector& segment_inf); + void create_segments(std::vector& segment_inf) override; void create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - int num_segments); + std::vector* rr_nodes, + t_rr_edge_info_set* rr_edges_to_create, + int num_segments) override; + size_t estimate_additional_nodes() override; int create_chanx_wire(int x_start, int x_end, int y, int ptc_num, e_direction direction, - std::vector& rr_nodes); + std::vector* rr_nodes); void record_tap_locations(unsigned x_start, unsigned x_end, unsigned y, @@ -190,7 +202,7 @@ class ClockSpine : public ClockNetwork { /* * Getters */ - ClockType get_network_type() const; + ClockType get_network_type() const override; /* * Setters @@ -208,15 +220,18 @@ class ClockSpine : public ClockNetwork { /* * Member functions */ - void create_segments(std::vector& segment_inf); + void create_segments(std::vector& segment_inf) override; void create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - int num_segments); + std::vector* rr_nodes, + t_rr_edge_info_set* rr_edges_to_create, + int num_segments) override; + size_t estimate_additional_nodes() override; int create_chany_wire(int y_start, int y_end, int x, int ptc_num, e_direction direction, - std::vector& rr_nodes, + std::vector* rr_nodes, int num_segments); void record_tap_locations(unsigned y_start, unsigned y_end, @@ -238,11 +253,14 @@ class ClockHTree : private ClockNetwork { HtreeTaps tap; public: - ClockType get_network_type() const { return ClockType::H_TREE; } + ClockType get_network_type() const override { return ClockType::H_TREE; } // TODO: Unimplemented member function - void create_segments(std::vector& segment_inf); + void create_segments(std::vector& segment_inf) override; void create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - int num_segments); + std::vector* rr_nodes, + t_rr_edge_info_set* rr_edges_to_create, + int num_segments) override; + size_t estimate_additional_nodes() override; }; #endif diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index cc738162be4..4136c7d7b65 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -1628,7 +1628,7 @@ static void timing_driven_expand_cheapest(t_heap* cheapest, target_node, router_stats); } else { - //Post-heap prune, do not re-explore from the current/new partial path as it + //Post-heap prune, do not re-explore from the current/new partial path as it //has worse cost than the best partial path to this node found so far VTR_LOGV_DEBUG(f_router_debug, " Worse cost to %d\n", inode); VTR_LOGV_DEBUG(f_router_debug, " Old total cost: %g\n", best_total_cost); diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index df1bb8b0967..37890ad9edc 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -148,8 +148,7 @@ static int get_opin_direct_connecions(int x, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs); -static void alloc_and_load_rr_graph(const int num_nodes, - std::vector& L_rr_node, +static void alloc_and_load_rr_graph(std::vector& L_rr_node, const int num_seg_types, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, @@ -173,7 +172,8 @@ static void alloc_and_load_rr_graph(const int num_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs, - bool is_global_graph); + bool is_global_graph, + const enum e_clock_modeling clock_modeling); static float pattern_fmod(float a, float b); static void load_uniform_connection_block_pattern(vtr::NdMatrix& tracks_connected_to_pin, @@ -296,6 +296,7 @@ static void build_rr_graph(const t_graph_type graph_type, const float R_minW_nmos, const float R_minW_pmos, const enum e_base_cost_type base_cost_type, + const enum e_clock_modeling clock_modeling, const bool trim_empty_channels, const bool trim_obs_channels, const t_direct_inf* directs, @@ -356,19 +357,12 @@ void create_rr_graph(const t_graph_type graph_type, det_routing_arch->R_minW_nmos, det_routing_arch->R_minW_pmos, base_cost_type, + clock_modeling, trim_empty_channels, trim_obs_channels, directs, num_directs, &det_routing_arch->wire_to_rr_ipin_switch, Warnings); - - if (clock_modeling == DEDICATED_NETWORK) { - ClockRRGraphBuilder::create_and_append_clock_rr_graph(segment_inf, - det_routing_arch->R_minW_nmos, - det_routing_arch->R_minW_pmos, - det_routing_arch->wire_to_rr_ipin_switch, - base_cost_type); - } } process_non_config_sets(); @@ -422,6 +416,7 @@ static void build_rr_graph(const t_graph_type graph_type, const float R_minW_nmos, const float R_minW_pmos, const enum e_base_cost_type base_cost_type, + const enum e_clock_modeling clock_modeling, const bool trim_empty_channels, const bool trim_obs_channels, const t_direct_inf* directs, @@ -579,6 +574,9 @@ static void build_rr_graph(const t_graph_type graph_type, device_ctx.rr_node_indices = alloc_and_load_rr_node_indices(max_chan_width, grid, &num_rr_nodes, chan_details_x, chan_details_y); + if (clock_modeling == DEDICATED_NETWORK) { + device_ctx.rr_nodes.reserve(num_rr_nodes + ClockRRGraphBuilder::estimate_additional_nodes()); + } device_ctx.rr_nodes.resize(num_rr_nodes); /* These are data structures used by the the unidir opin mapping. They are used @@ -678,7 +676,7 @@ static void build_rr_graph(const t_graph_type graph_type, /* END OPIN MAP */ bool Fc_clipped = false; - alloc_and_load_rr_graph(device_ctx.rr_nodes.size(), device_ctx.rr_nodes, segment_inf.size(), + alloc_and_load_rr_graph(device_ctx.rr_nodes, segment_inf.size(), chan_details_x, chan_details_y, track_to_pin_lookup, opin_to_track_map, switch_block_conn, sb_conn_map, grid, Fs, unidir_sb_pattern, @@ -690,7 +688,8 @@ static void build_rr_graph(const t_graph_type graph_type, directionality, &Fc_clipped, directs, num_directs, clb_to_clb_directs, - is_global_graph); + is_global_graph, + clock_modeling); /* Update rr_nodes capacities if global routing */ if (graph_type == GRAPH_GLOBAL) { @@ -1172,8 +1171,7 @@ static void free_type_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map, /* Does the actual work of allocating the rr_graph and filling all the * * appropriate values. Everything up to this was just a prelude! */ -static void alloc_and_load_rr_graph(const int num_nodes, - std::vector& L_rr_node, +static void alloc_and_load_rr_graph(std::vector& L_rr_node, const int num_seg_types, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, @@ -1197,7 +1195,8 @@ static void alloc_and_load_rr_graph(const int num_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs, - bool is_global_graph) { + bool is_global_graph, + const enum e_clock_modeling clock_modeling) { //We take special care when creating RR graph edges (there are typically many more //edges than nodes in an RR graph). // @@ -1295,7 +1294,17 @@ static void alloc_and_load_rr_graph(const int num_nodes, } } - init_fan_in(L_rr_node, num_nodes); + if (clock_modeling == DEDICATED_NETWORK) { + ClockRRGraphBuilder::create_and_append_clock_rr_graph( + L_rr_node, + num_seg_types, + rr_edges_to_create); + uniquify_edges(rr_edges_to_create); + alloc_and_load_edges(L_rr_node, rr_edges_to_create); + rr_edges_to_create.clear(); + } + + init_fan_in(L_rr_node, L_rr_node.size()); } static void build_bidir_rr_opins(const int i, diff --git a/vpr/src/route/rr_graph2.h b/vpr/src/route/rr_graph2.h index 617730a0df8..e195d13278a 100644 --- a/vpr/src/route/rr_graph2.h +++ b/vpr/src/route/rr_graph2.h @@ -235,6 +235,7 @@ void dump_sblock_pattern(const t_sblock_pattern& sblock_pattern, const char* fname); //Partitions RR graph edges to allow fast access to configurable/non-configurabe edge subsets +struct DeviceContext; void partition_rr_graph_edges(DeviceContext& device_ctx); #endif diff --git a/vpr/src/route/rr_graph_clock.cpp b/vpr/src/route/rr_graph_clock.cpp index 47746c0f883..fd1ec74bbbe 100644 --- a/vpr/src/route/rr_graph_clock.cpp +++ b/vpr/src/route/rr_graph_clock.cpp @@ -9,118 +9,43 @@ #include "vtr_assert.h" #include "vtr_log.h" +#include "vtr_time.h" #include "vpr_error.h" -void ClockRRGraphBuilder::create_and_append_clock_rr_graph(std::vector& segment_inf, - const float R_minW_nmos, - const float R_minW_pmos, - int wire_to_rr_ipin_switch, - const enum e_base_cost_type base_cost_type) { - vtr::printf_info("Starting clock network routing resource graph generation...\n"); - clock_t begin = clock(); +void ClockRRGraphBuilder::create_and_append_clock_rr_graph( + std::vector& L_rr_node, + int num_seg_types, + t_rr_edge_info_set& rr_edges_to_create) { + vtr::ScopedStartFinishTimer timer("Build clock network routing resource graph"); - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto* chan_width = &device_ctx.chan_width; + const auto& device_ctx = g_vpr_ctx.device(); auto& clock_networks = device_ctx.clock_networks; auto& clock_routing = device_ctx.clock_connections; - size_t clock_nodes_start_idx = device_ctx.rr_nodes.size(); - - ClockRRGraphBuilder clock_graph = ClockRRGraphBuilder(); - clock_graph.create_clock_networks_wires(clock_networks, segment_inf.size()); + ClockRRGraphBuilder clock_graph = ClockRRGraphBuilder(&L_rr_node, &rr_edges_to_create); + clock_graph.create_clock_networks_wires(clock_networks, num_seg_types); clock_graph.create_clock_networks_switches(clock_routing); - - // Reset fanin to account for newly added clock rr_nodes - init_fan_in(device_ctx.rr_nodes, device_ctx.rr_nodes.size()); - - clock_graph.add_rr_switches_and_map_to_nodes(clock_nodes_start_idx, R_minW_nmos, R_minW_pmos); - - // "Partition the rr graph edges for efficient access to configurable/non-configurable - // edge subsets. Must be done after RR switches have been allocated" - partition_rr_graph_edges(device_ctx); - - alloc_and_load_rr_indexed_data(segment_inf, device_ctx.rr_node_indices, - chan_width->max, wire_to_rr_ipin_switch, base_cost_type); - - float elapsed_time = (float)(clock() - begin) / CLOCKS_PER_SEC; - vtr::printf_info("Building clock network resource graph took %g seconds\n", elapsed_time); } // Clock network information comes from the arch file -void ClockRRGraphBuilder::create_clock_networks_wires(std::vector>& clock_networks, +void ClockRRGraphBuilder::create_clock_networks_wires(const std::vector>& clock_networks, int num_segments) { // Add rr_nodes for each clock network wire for (auto& clock_network : clock_networks) { - clock_network->create_rr_nodes_for_clock_network_wires(*this, num_segments); + clock_network->create_rr_nodes_for_clock_network_wires(*this, rr_nodes_, rr_edges_to_create_, num_segments); } // Reduce the capacity of rr_nodes for performance - auto& rr_nodes = g_vpr_ctx.mutable_device().rr_nodes; - rr_nodes.shrink_to_fit(); + rr_nodes_->shrink_to_fit(); } // Clock switch information comes from the arch file -void ClockRRGraphBuilder::create_clock_networks_switches(std::vector>& clock_connections) { +void ClockRRGraphBuilder::create_clock_networks_switches(const std::vector>& clock_connections) { for (auto& clock_connection : clock_connections) { - clock_connection->create_switches(*this); + clock_connection->create_switches(*this, rr_edges_to_create_); } } -void ClockRRGraphBuilder::add_rr_switches_and_map_to_nodes(size_t node_start_idx, - const float R_minW_nmos, - const float R_minW_pmos) { - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& rr_nodes = device_ctx.rr_nodes; - - // Check to see that clock nodes were sucessfully appended to rr_nodes - VTR_ASSERT(rr_nodes.size() > node_start_idx); - - std::unordered_map arch_switch_to_rr_switch; - - // The following assumes that arch_switch was specified earlier when the edges where added - for (size_t node_idx = node_start_idx; node_idx < rr_nodes.size(); node_idx++) { - auto& from_node = rr_nodes[node_idx]; - for (t_edge_size edge_idx = 0; edge_idx < from_node.num_edges(); edge_idx++) { - int arch_switch_idx = from_node.edge_switch(edge_idx); - - int rr_switch_idx; - auto itter = arch_switch_to_rr_switch.find(arch_switch_idx); - if (itter == arch_switch_to_rr_switch.end()) { - rr_switch_idx = add_rr_switch_from_arch_switch_inf(arch_switch_idx, - R_minW_nmos, - R_minW_pmos); - arch_switch_to_rr_switch[arch_switch_idx] = rr_switch_idx; - } else { - rr_switch_idx = itter->second; - } - - from_node.set_edge_switch(edge_idx, rr_switch_idx); - } - } - - device_ctx.rr_switch_inf.shrink_to_fit(); -} - -int ClockRRGraphBuilder::add_rr_switch_from_arch_switch_inf(int arch_switch_idx, - const float R_minW_nmos, - const float R_minW_pmos) { - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& rr_switch_inf = device_ctx.rr_switch_inf; - auto& arch_switch_inf = device_ctx.arch_switch_inf; - - rr_switch_inf.emplace_back(); - int rr_switch_idx = rr_switch_inf.size() - 1; - - // TODO: Add support for non fixed Tdel based on fanin information - // and move assigning Tdel into add_rr_switch - VTR_ASSERT(arch_switch_inf[arch_switch_idx].fixed_Tdel()); - int fanin = UNDEFINED; - - load_rr_switch_from_arch_switch(arch_switch_idx, rr_switch_idx, fanin, R_minW_nmos, R_minW_pmos); - - return rr_switch_idx; -} - void ClockRRGraphBuilder::add_switch_location(std::string clock_name, std::string switch_point_name, int x, @@ -248,3 +173,19 @@ int ClockRRGraphBuilder::get_and_increment_chany_ptc_num() { return ptc_num; } + +size_t ClockRRGraphBuilder::estimate_additional_nodes() { + size_t num_additional_nodes = 0; + + const auto& device_ctx = g_vpr_ctx.device(); + auto& clock_networks = device_ctx.clock_networks; + auto& clock_routing = device_ctx.clock_connections; + for (auto& clock_network : clock_networks) { + num_additional_nodes += clock_network->estimate_additional_nodes(); + } + for (auto& clock_connection : clock_routing) { + num_additional_nodes += clock_connection->estimate_additional_nodes(); + } + + return num_additional_nodes; +} diff --git a/vpr/src/route/rr_graph_clock.h b/vpr/src/route/rr_graph_clock.h index ccab33d8bf4..1e80a2e2b43 100644 --- a/vpr/src/route/rr_graph_clock.h +++ b/vpr/src/route/rr_graph_clock.h @@ -68,11 +68,18 @@ class ClockRRGraphBuilder { int get_and_increment_chanx_ptc_num(); int get_and_increment_chany_ptc_num(); - public: /* Reverse lookup for to find the clock source and tap locations for each clock_network * The map key is the the clock network name and value are all the switch points*/ std::unordered_map clock_name_to_switch_points; + public: + ClockRRGraphBuilder( + std::vector* rr_nodes, + t_rr_edge_info_set* rr_edges_to_create) + : rr_nodes_(rr_nodes) + , rr_edges_to_create_(rr_edges_to_create) { + } + /* Saves a map from switch rr_node idx -> {x, y} location */ void add_switch_location(std::string clock_name, std::string switch_point_name, @@ -90,41 +97,26 @@ class ClockRRGraphBuilder { std::set> get_switch_locations(std::string clock_name, std::string switch_point_name) const; + static size_t estimate_additional_nodes(); + public: /* Creates the routing resourse (rr) graph of the clock network and appends it to the * existing rr graph created in build_rr_graph for inter-block and intra-block routing. */ - static void create_and_append_clock_rr_graph(std::vector& segment_inf, - const float R_minW_nmos, - const float R_minW_pmos, - int wire_to_rr_ipin_switch, - const enum e_base_cost_type base_cost_type); + static void create_and_append_clock_rr_graph( + std::vector& L_rr_node, + int num_seg_types, + t_rr_edge_info_set& rr_edges_to_create); private: /* loop over all of the clock networks and create their wires */ - void create_clock_networks_wires(std::vector>& clock_networks, + void create_clock_networks_wires(const std::vector>& clock_networks, int num_segments); /* loop over all clock routing connections and create the switches and connections */ - void create_clock_networks_switches(std::vector>& clock_connections); - - /* Adds the architecture switches that the clock rr_nodes use to the rr switches and - * maps the newly added rr_switches to the nodes. - * The input nodes_start_idx ~ corresponds to the rr_node index of the first node - * used to create the clock network. Every node from node_start_idx..rr_nodes.size-1 - * is a node in the clock network.*/ - // TODO: Change to account for swtich fanin. Note: this function is simular to - // remap_rr_node_switch_indices but does not take into account node fanin. - void add_rr_switches_and_map_to_nodes(size_t nodes_start_idx, - const float R_minW_nmos, - const float R_minW_pmos); - - /* Returns the index of the newly added rr_switch. The rr_switch information is coppied - * in from the arch_switch information */ - // TODO: Does not account for fanin information when copping Tdel. Note: this function - // is simular to load_rr_switch_inf but does not take into account node fanin. - int add_rr_switch_from_arch_switch_inf(int arch_switch_idx, - const float R_minW_nmos, - const float R_minW_pmos); + void create_clock_networks_switches(const std::vector>& clock_connections); + + std::vector* rr_nodes_; + t_rr_edge_info_set* rr_edges_to_create_; }; #endif From bb636d22872da36297b7d8a450db0c6999de8278 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Thu, 23 Jan 2020 21:38:04 -0800 Subject: [PATCH 2/7] Delay update to chan_width and avoid uses global context. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/clock_connection_builders.cpp | 5 +- vpr/src/route/clock_network_builders.cpp | 18 +-- vpr/src/route/clock_network_builders.h | 8 +- vpr/src/route/rr_graph.cpp | 151 +++++++++++--------- vpr/src/route/rr_graph_clock.cpp | 66 ++++----- vpr/src/route/rr_graph_clock.h | 38 +++-- 6 files changed, 146 insertions(+), 140 deletions(-) diff --git a/vpr/src/route/clock_connection_builders.cpp b/vpr/src/route/clock_connection_builders.cpp index 86bdf552c44..7345cf24c01 100644 --- a/vpr/src/route/clock_connection_builders.cpp +++ b/vpr/src/route/clock_connection_builders.cpp @@ -145,8 +145,7 @@ size_t ClockToClockConneciton::estimate_additional_nodes() { } void ClockToClockConneciton::create_switches(const ClockRRGraphBuilder& clock_graph, t_rr_edge_info_set* rr_edges_to_create) { - auto& device_ctx = g_vpr_ctx.device(); - auto& grid = device_ctx.grid; + auto& grid = clock_graph.grid(); auto to_locations = clock_graph.get_switch_locations(to_clock, to_switch); @@ -227,7 +226,7 @@ size_t ClockToPinsConnection::estimate_additional_nodes() { void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_graph, t_rr_edge_info_set* rr_edges_to_create) { auto& device_ctx = g_vpr_ctx.device(); auto& rr_node_indices = device_ctx.rr_node_indices; - auto& grid = device_ctx.grid; + auto& grid = clock_graph.grid(); for (size_t x = 0; x < grid.width(); x++) { for (size_t y = 0; y < grid.height(); y++) { diff --git a/vpr/src/route/clock_network_builders.cpp b/vpr/src/route/clock_network_builders.cpp index 4fe65c4ed3a..174b3990ac3 100644 --- a/vpr/src/route/clock_network_builders.cpp +++ b/vpr/src/route/clock_network_builders.cpp @@ -175,10 +175,7 @@ void ClockRib::create_segments(std::vector& segment_inf) { populate_segment_values(index, name, length, x_chan_wire.layer, segment_inf); } -size_t ClockRib::estimate_additional_nodes() { - const auto& device_ctx = g_vpr_ctx.device(); - const auto& grid = device_ctx.grid; - +size_t ClockRib::estimate_additional_nodes(const DeviceGrid& grid) { // Avoid an infinite loop VTR_ASSERT(repeat.y > 0); VTR_ASSERT(repeat.x > 0); @@ -225,8 +222,7 @@ void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphB // to calculate the cost_index (void)num_segments; - const auto& device_ctx = g_vpr_ctx.device(); - const auto& grid = device_ctx.grid; + const auto& grid = clock_graph.grid(); int ptc_num = clock_graph.get_and_increment_chanx_ptc_num(); // used for drawing @@ -465,12 +461,9 @@ void ClockSpine::create_segments(std::vector& segment_inf) { populate_segment_values(index, name, length, y_chan_wire.layer, segment_inf); } -size_t ClockSpine::estimate_additional_nodes() { +size_t ClockSpine::estimate_additional_nodes(const DeviceGrid& grid) { size_t num_additional_nodes = 0; - auto& device_ctx = g_vpr_ctx.device(); - auto& grid = device_ctx.grid; - // Avoid an infinite loop VTR_ASSERT(repeat.y > 0); VTR_ASSERT(repeat.x > 0); @@ -512,8 +505,7 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap std::vector* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) { - auto& device_ctx = g_vpr_ctx.device(); - auto& grid = device_ctx.grid; + auto& grid = clock_graph.grid(); int ptc_num = clock_graph.get_and_increment_chany_ptc_num(); // used for drawing @@ -665,7 +657,7 @@ void ClockHTree::create_segments(std::vector& segment_inf) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "HTrees are not yet supported.\n"); } -size_t ClockHTree::estimate_additional_nodes() { +size_t ClockHTree::estimate_additional_nodes(const DeviceGrid& /*grid*/) { return 0; } diff --git a/vpr/src/route/clock_network_builders.h b/vpr/src/route/clock_network_builders.h index f6ab7a7dfd4..c4caa039b2e 100644 --- a/vpr/src/route/clock_network_builders.h +++ b/vpr/src/route/clock_network_builders.h @@ -112,7 +112,7 @@ class ClockNetwork { t_rr_edge_info_set* rr_edges_to_create, int num_segments) = 0; - virtual size_t estimate_additional_nodes() = 0; + virtual size_t estimate_additional_nodes(const DeviceGrid& grid) = 0; }; class ClockRib : public ClockNetwork { @@ -166,7 +166,7 @@ class ClockRib : public ClockNetwork { std::vector* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) override; - size_t estimate_additional_nodes() override; + size_t estimate_additional_nodes(const DeviceGrid& grid) override; int create_chanx_wire(int x_start, int x_end, int y, @@ -225,7 +225,7 @@ class ClockSpine : public ClockNetwork { std::vector* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) override; - size_t estimate_additional_nodes() override; + size_t estimate_additional_nodes(const DeviceGrid& grid) override; int create_chany_wire(int y_start, int y_end, int x, @@ -260,7 +260,7 @@ class ClockHTree : private ClockNetwork { std::vector* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) override; - size_t estimate_additional_nodes() override; + size_t estimate_additional_nodes(const DeviceGrid& grid) override; }; #endif diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 37890ad9edc..8b51ad486ef 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -148,32 +148,32 @@ static int get_opin_direct_connecions(int x, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs); -static void alloc_and_load_rr_graph(std::vector& L_rr_node, - const int num_seg_types, - const t_chan_details& chan_details_x, - const t_chan_details& chan_details_y, - const t_track_to_pin_lookup& track_to_pin_lookup, - const t_pin_to_track_lookup& opin_to_track_map, - const vtr::NdMatrix, 3>& switch_block_conn, - t_sb_connection_map* sb_conn_map, - const DeviceGrid& grid, - const int Fs, - t_sblock_pattern& sblock_pattern, - const std::vector>& Fc_out, - vtr::NdMatrix& Fc_xofs, - vtr::NdMatrix& Fc_yofs, - const t_rr_node_indices& L_rr_node_indices, - const int max_chan_width, - const t_chan_width& chan_width, - const int wire_to_ipin_switch, - const int delayless_switch, - const enum e_directionality directionality, - bool* Fc_clipped, - const t_direct_inf* directs, - const int num_directs, - const t_clb_to_clb_directs* clb_to_clb_directs, - bool is_global_graph, - const enum e_clock_modeling clock_modeling); +static std::function alloc_and_load_rr_graph(std::vector& L_rr_node, + const int num_seg_types, + const t_chan_details& chan_details_x, + const t_chan_details& chan_details_y, + const t_track_to_pin_lookup& track_to_pin_lookup, + const t_pin_to_track_lookup& opin_to_track_map, + const vtr::NdMatrix, 3>& switch_block_conn, + t_sb_connection_map* sb_conn_map, + const DeviceGrid& grid, + const int Fs, + t_sblock_pattern& sblock_pattern, + const std::vector>& Fc_out, + vtr::NdMatrix& Fc_xofs, + vtr::NdMatrix& Fc_yofs, + const t_rr_node_indices& L_rr_node_indices, + const int max_chan_width, + const t_chan_width& chan_width, + const int wire_to_ipin_switch, + const int delayless_switch, + const enum e_directionality directionality, + bool* Fc_clipped, + const t_direct_inf* directs, + const int num_directs, + const t_clb_to_clb_directs* clb_to_clb_directs, + bool is_global_graph, + const enum e_clock_modeling clock_modeling); static float pattern_fmod(float a, float b); static void load_uniform_connection_block_pattern(vtr::NdMatrix& tracks_connected_to_pin, @@ -575,7 +575,7 @@ static void build_rr_graph(const t_graph_type graph_type, device_ctx.rr_node_indices = alloc_and_load_rr_node_indices(max_chan_width, grid, &num_rr_nodes, chan_details_x, chan_details_y); if (clock_modeling == DEDICATED_NETWORK) { - device_ctx.rr_nodes.reserve(num_rr_nodes + ClockRRGraphBuilder::estimate_additional_nodes()); + device_ctx.rr_nodes.reserve(num_rr_nodes + ClockRRGraphBuilder::estimate_additional_nodes(grid)); } device_ctx.rr_nodes.resize(num_rr_nodes); @@ -676,20 +676,21 @@ static void build_rr_graph(const t_graph_type graph_type, /* END OPIN MAP */ bool Fc_clipped = false; - alloc_and_load_rr_graph(device_ctx.rr_nodes, segment_inf.size(), - chan_details_x, chan_details_y, - track_to_pin_lookup, opin_to_track_map, - switch_block_conn, sb_conn_map, grid, Fs, unidir_sb_pattern, - Fc_out, Fc_xofs, Fc_yofs, device_ctx.rr_node_indices, - max_chan_width, - nodes_per_chan, - wire_to_arch_ipin_switch, - delayless_switch, - directionality, - &Fc_clipped, - directs, num_directs, clb_to_clb_directs, - is_global_graph, - clock_modeling); + auto update_chan_width = alloc_and_load_rr_graph( + device_ctx.rr_nodes, segment_inf.size(), + chan_details_x, chan_details_y, + track_to_pin_lookup, opin_to_track_map, + switch_block_conn, sb_conn_map, grid, Fs, unidir_sb_pattern, + Fc_out, Fc_xofs, Fc_yofs, device_ctx.rr_node_indices, + max_chan_width, + nodes_per_chan, + wire_to_arch_ipin_switch, + delayless_switch, + directionality, + &Fc_clipped, + directs, num_directs, clb_to_clb_directs, + is_global_graph, + clock_modeling); /* Update rr_nodes capacities if global routing */ if (graph_type == GRAPH_GLOBAL) { @@ -705,6 +706,8 @@ static void build_rr_graph(const t_graph_type graph_type, } } + update_chan_width(&nodes_per_chan); + /* Allocate and load routing resource switches, which are derived from the switches from the architecture file, * based on their fanin in the rr graph. This routine also adjusts the rr nodes to point to these new rr switches */ alloc_and_load_rr_switch_inf(num_arch_switches, R_minW_nmos, R_minW_pmos, wire_to_arch_ipin_switch, wire_to_rr_ipin_switch); @@ -1171,32 +1174,32 @@ static void free_type_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map, /* Does the actual work of allocating the rr_graph and filling all the * * appropriate values. Everything up to this was just a prelude! */ -static void alloc_and_load_rr_graph(std::vector& L_rr_node, - const int num_seg_types, - const t_chan_details& chan_details_x, - const t_chan_details& chan_details_y, - const t_track_to_pin_lookup& track_to_pin_lookup, - const t_pin_to_track_lookup& opin_to_track_map, - const vtr::NdMatrix, 3>& switch_block_conn, - t_sb_connection_map* sb_conn_map, - const DeviceGrid& grid, - const int Fs, - t_sblock_pattern& sblock_pattern, - const std::vector>& Fc_out, - vtr::NdMatrix& Fc_xofs, - vtr::NdMatrix& Fc_yofs, - const t_rr_node_indices& L_rr_node_indices, - const int max_chan_width, - const t_chan_width& chan_width, - const int wire_to_ipin_switch, - const int delayless_switch, - const enum e_directionality directionality, - bool* Fc_clipped, - const t_direct_inf* directs, - const int num_directs, - const t_clb_to_clb_directs* clb_to_clb_directs, - bool is_global_graph, - const enum e_clock_modeling clock_modeling) { +static std::function alloc_and_load_rr_graph(std::vector& L_rr_node, + const int num_seg_types, + const t_chan_details& chan_details_x, + const t_chan_details& chan_details_y, + const t_track_to_pin_lookup& track_to_pin_lookup, + const t_pin_to_track_lookup& opin_to_track_map, + const vtr::NdMatrix, 3>& switch_block_conn, + t_sb_connection_map* sb_conn_map, + const DeviceGrid& grid, + const int Fs, + t_sblock_pattern& sblock_pattern, + const std::vector>& Fc_out, + vtr::NdMatrix& Fc_xofs, + vtr::NdMatrix& Fc_yofs, + const t_rr_node_indices& L_rr_node_indices, + const int max_chan_width, + const t_chan_width& chan_width, + const int wire_to_ipin_switch, + const int delayless_switch, + const enum e_directionality directionality, + bool* Fc_clipped, + const t_direct_inf* directs, + const int num_directs, + const t_clb_to_clb_directs* clb_to_clb_directs, + bool is_global_graph, + const enum e_clock_modeling clock_modeling) { //We take special care when creating RR graph edges (there are typically many more //edges than nodes in an RR graph). // @@ -1294,17 +1297,25 @@ static void alloc_and_load_rr_graph(std::vector& L_rr_node, } } + std::function update_chan_width = [](t_chan_width*) { + }; if (clock_modeling == DEDICATED_NETWORK) { - ClockRRGraphBuilder::create_and_append_clock_rr_graph( - L_rr_node, + ClockRRGraphBuilder builder( + chan_width, grid, &L_rr_node); + builder.create_and_append_clock_rr_graph( num_seg_types, - rr_edges_to_create); + &rr_edges_to_create); uniquify_edges(rr_edges_to_create); alloc_and_load_edges(L_rr_node, rr_edges_to_create); rr_edges_to_create.clear(); + update_chan_width = [builder](t_chan_width* chan_width) { + builder.update_chan_width(chan_width); + }; } init_fan_in(L_rr_node, L_rr_node.size()); + + return update_chan_width; } static void build_bidir_rr_opins(const int i, diff --git a/vpr/src/route/rr_graph_clock.cpp b/vpr/src/route/rr_graph_clock.cpp index fd1ec74bbbe..35b827de7a0 100644 --- a/vpr/src/route/rr_graph_clock.cpp +++ b/vpr/src/route/rr_graph_clock.cpp @@ -12,27 +12,25 @@ #include "vtr_time.h" #include "vpr_error.h" -void ClockRRGraphBuilder::create_and_append_clock_rr_graph( - std::vector& L_rr_node, - int num_seg_types, - t_rr_edge_info_set& rr_edges_to_create) { +void ClockRRGraphBuilder::create_and_append_clock_rr_graph(int num_seg_types, + t_rr_edge_info_set* rr_edges_to_create) { vtr::ScopedStartFinishTimer timer("Build clock network routing resource graph"); const auto& device_ctx = g_vpr_ctx.device(); auto& clock_networks = device_ctx.clock_networks; auto& clock_routing = device_ctx.clock_connections; - ClockRRGraphBuilder clock_graph = ClockRRGraphBuilder(&L_rr_node, &rr_edges_to_create); - clock_graph.create_clock_networks_wires(clock_networks, num_seg_types); - clock_graph.create_clock_networks_switches(clock_routing); + create_clock_networks_wires(clock_networks, num_seg_types, rr_edges_to_create); + create_clock_networks_switches(clock_routing, rr_edges_to_create); } // Clock network information comes from the arch file void ClockRRGraphBuilder::create_clock_networks_wires(const std::vector>& clock_networks, - int num_segments) { + int num_segments, + t_rr_edge_info_set* rr_edges_to_create) { // Add rr_nodes for each clock network wire for (auto& clock_network : clock_networks) { - clock_network->create_rr_nodes_for_clock_network_wires(*this, rr_nodes_, rr_edges_to_create_, num_segments); + clock_network->create_rr_nodes_for_clock_network_wires(*this, rr_nodes_, rr_edges_to_create, num_segments); } // Reduce the capacity of rr_nodes for performance @@ -40,9 +38,10 @@ void ClockRRGraphBuilder::create_clock_networks_wires(const std::vector>& clock_connections) { +void ClockRRGraphBuilder::create_clock_networks_switches(const std::vector>& clock_connections, + t_rr_edge_info_set* rr_edges_to_create) { for (auto& clock_connection : clock_connections) { - clock_connection->create_switches(*this, rr_edges_to_create_); + clock_connection->create_switches(*this, rr_edges_to_create); } } @@ -137,51 +136,42 @@ std::set> SwitchPoint::get_switch_locations() const { } int ClockRRGraphBuilder::get_and_increment_chanx_ptc_num() { - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& grid = device_ctx.grid; - auto* channel_width = &device_ctx.chan_width; - // ptc_num is determined by the channel width // The channel width lets the drawing engine how much to space the LBs appart - int ptc_num = channel_width->x_max++; - if (channel_width->x_max > channel_width->max) { - channel_width->max = channel_width->x_max; - } - - for (size_t i = 0; i < grid.height(); ++i) { - device_ctx.chan_width.x_list[i]++; - } - + int ptc_num = chan_width_.x_max + (chanx_ptc_idx_++); return ptc_num; } int ClockRRGraphBuilder::get_and_increment_chany_ptc_num() { - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& grid = device_ctx.grid; - auto* channel_width = &device_ctx.chan_width; - // ptc_num is determined by the channel width // The channel width lets the drawing engine how much to space the LBs appart - int ptc_num = channel_width->y_max++; - if (channel_width->y_max > channel_width->max) { - channel_width->max = channel_width->y_max; - } + int ptc_num = chan_width_.y_max + (chany_ptc_idx_++); + return ptc_num; +} - for (size_t i = 0; i < grid.width(); ++i) { - device_ctx.chan_width.y_list[i]++; - } +void ClockRRGraphBuilder::update_chan_width(t_chan_width* chan_width) const { + chan_width->x_max += chanx_ptc_idx_; + chan_width->y_max += chany_ptc_idx_; + chan_width->max = std::max(chan_width->max, chan_width->x_max); + chan_width->max = std::max(chan_width->max, chan_width->y_max); - return ptc_num; + for (size_t i = 0; i < grid_.height(); ++i) { + chan_width->x_list[i] += chanx_ptc_idx_; + } + for (size_t i = 0; i < grid_.width(); ++i) { + chan_width->y_list[i] += chany_ptc_idx_; + } } -size_t ClockRRGraphBuilder::estimate_additional_nodes() { +size_t ClockRRGraphBuilder::estimate_additional_nodes(const DeviceGrid& grid) { size_t num_additional_nodes = 0; const auto& device_ctx = g_vpr_ctx.device(); auto& clock_networks = device_ctx.clock_networks; auto& clock_routing = device_ctx.clock_connections; + for (auto& clock_network : clock_networks) { - num_additional_nodes += clock_network->estimate_additional_nodes(); + num_additional_nodes += clock_network->estimate_additional_nodes(grid); } for (auto& clock_connection : clock_routing) { num_additional_nodes += clock_connection->estimate_additional_nodes(); diff --git a/vpr/src/route/rr_graph_clock.h b/vpr/src/route/rr_graph_clock.h index 1e80a2e2b43..0d282fde216 100644 --- a/vpr/src/route/rr_graph_clock.h +++ b/vpr/src/route/rr_graph_clock.h @@ -74,10 +74,18 @@ class ClockRRGraphBuilder { public: ClockRRGraphBuilder( - std::vector* rr_nodes, - t_rr_edge_info_set* rr_edges_to_create) - : rr_nodes_(rr_nodes) - , rr_edges_to_create_(rr_edges_to_create) { + const t_chan_width& chan_width, + const DeviceGrid& grid, + std::vector* rr_nodes) + : chan_width_(chan_width) + , grid_(grid) + , rr_nodes_(rr_nodes) + , chanx_ptc_idx_(0) + , chany_ptc_idx_(0) { + } + + const DeviceGrid& grid() const { + return grid_; } /* Saves a map from switch rr_node idx -> {x, y} location */ @@ -97,26 +105,32 @@ class ClockRRGraphBuilder { std::set> get_switch_locations(std::string clock_name, std::string switch_point_name) const; - static size_t estimate_additional_nodes(); + void update_chan_width(t_chan_width* chan_width) const; + + static size_t estimate_additional_nodes(const DeviceGrid& grid); public: /* Creates the routing resourse (rr) graph of the clock network and appends it to the * existing rr graph created in build_rr_graph for inter-block and intra-block routing. */ - static void create_and_append_clock_rr_graph( - std::vector& L_rr_node, - int num_seg_types, - t_rr_edge_info_set& rr_edges_to_create); + void create_and_append_clock_rr_graph(int num_seg_types, + t_rr_edge_info_set* rr_edges_to_create); private: /* loop over all of the clock networks and create their wires */ void create_clock_networks_wires(const std::vector>& clock_networks, - int num_segments); + int num_segments, + t_rr_edge_info_set* rr_edges_to_create); /* loop over all clock routing connections and create the switches and connections */ - void create_clock_networks_switches(const std::vector>& clock_connections); + void create_clock_networks_switches(const std::vector>& clock_connections, + t_rr_edge_info_set* rr_edges_to_create); + const t_chan_width& chan_width_; + const DeviceGrid& grid_; std::vector* rr_nodes_; - t_rr_edge_info_set* rr_edges_to_create_; + + int chanx_ptc_idx_; + int chany_ptc_idx_; }; #endif From ab5c4ba87bc0ab304fade9494f6642404e5a460f Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 24 Jan 2020 08:26:03 -0800 Subject: [PATCH 3/7] Fix some bugs found during check rr graph. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/check_rr_graph.cpp | 5 +++++ vpr/src/route/clock_connection_builders.cpp | 22 ++++++++++----------- vpr/src/route/clock_connection_builders.h | 12 +++++------ vpr/src/route/clock_network_builders.cpp | 8 ++++---- vpr/src/route/rr_graph.cpp | 5 ++++- vpr/src/route/rr_graph_clock.cpp | 15 ++++++++++++++ vpr/src/route/rr_graph_clock.h | 5 +++++ 7 files changed, 50 insertions(+), 22 deletions(-) diff --git a/vpr/src/route/check_rr_graph.cpp b/vpr/src/route/check_rr_graph.cpp index 66609917624..1495ec21da2 100644 --- a/vpr/src/route/check_rr_graph.cpp +++ b/vpr/src/route/check_rr_graph.cpp @@ -241,6 +241,11 @@ void check_rr_node(int inode, enum e_route_type route_type, const DeviceContext& cost_index = device_ctx.rr_nodes[inode].cost_index(); type = nullptr; + // Virtual clock network sink is special, ignore. + if (device_ctx.virtual_clock_network_root_idx == inode) { + return; + } + const auto& grid = device_ctx.grid; if (xlow > xhigh || ylow > yhigh) { VPR_ERROR(VPR_ERROR_ROUTE, diff --git a/vpr/src/route/clock_connection_builders.cpp b/vpr/src/route/clock_connection_builders.cpp index 7345cf24c01..be94e6b06bb 100644 --- a/vpr/src/route/clock_connection_builders.cpp +++ b/vpr/src/route/clock_connection_builders.cpp @@ -27,8 +27,8 @@ void RoutingToClockConnection::set_switch_location(int x, int y) { switch_location.y = y; } -void RoutingToClockConnection::set_switch(int rr_switch_index) { - rr_switch_idx = rr_switch_index; +void RoutingToClockConnection::set_switch(int arch_switch_index) { + arch_switch_idx = arch_switch_index; } void RoutingToClockConnection::set_fc_val(float fc_val) { @@ -74,18 +74,18 @@ void RoutingToClockConnection::create_switches(const ClockRRGraphBuilder& clock_ // Connect to x-channel wires unsigned num_wires_x = x_wire_indices.size() * fc; for (size_t i = 0; i < num_wires_x; i++) { - rr_edges_to_create->emplace_back(x_wire_indices[i], clock_index, rr_switch_idx); + clock_graph.add_edge(rr_edges_to_create, x_wire_indices[i], clock_index, arch_switch_idx); } // Connect to y-channel wires unsigned num_wires_y = y_wire_indices.size() * fc; for (size_t i = 0; i < num_wires_y; i++) { - rr_edges_to_create->emplace_back(y_wire_indices[i], clock_index, rr_switch_idx); + clock_graph.add_edge(rr_edges_to_create, y_wire_indices[i], clock_index, arch_switch_idx); } // Connect to virtual clock sink node // used by the two stage router - rr_edges_to_create->emplace_back(clock_index, virtual_clock_network_root_idx, rr_switch_idx); + clock_graph.add_edge(rr_edges_to_create, clock_index, virtual_clock_network_root_idx, arch_switch_idx); } } @@ -128,8 +128,8 @@ void ClockToClockConneciton::set_to_clock_switch_point_name(std::string switch_p to_switch = switch_point_name; } -void ClockToClockConneciton::set_switch(int rr_switch_index) { - rr_switch_idx = rr_switch_index; +void ClockToClockConneciton::set_switch(int arch_switch_index) { + arch_switch_idx = arch_switch_index; } void ClockToClockConneciton::set_fc_val(float fc_val) { @@ -187,7 +187,7 @@ void ClockToClockConneciton::create_switches(const ClockRRGraphBuilder& clock_gr if (from_itter == from_rr_node_indices.end()) { from_itter = from_rr_node_indices.begin(); } - rr_edges_to_create->emplace_back(*from_itter, to_index, rr_switch_idx); + clock_graph.add_edge(rr_edges_to_create, *from_itter, to_index, arch_switch_idx); from_itter++; } } @@ -207,8 +207,8 @@ void ClockToPinsConnection::set_clock_switch_point_name( switch_point_name = connection_switch_point_name; } -void ClockToPinsConnection::set_switch(int rr_switch_index) { - rr_switch_idx = rr_switch_index; +void ClockToPinsConnection::set_switch(int arch_switch_index) { + arch_switch_idx = arch_switch_index; } void ClockToPinsConnection::set_fc_val(float fc_val) { @@ -301,7 +301,7 @@ void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_gra //Create edges depending on Fc for (size_t i = 0; i < clock_network_indices.size() * fc; i++) { - rr_edges_to_create->emplace_back(clock_network_indices[i], clock_pin_node_idx, rr_switch_idx); + clock_graph.add_edge(rr_edges_to_create, clock_network_indices[i], clock_pin_node_idx, arch_switch_idx); } } } diff --git a/vpr/src/route/clock_connection_builders.h b/vpr/src/route/clock_connection_builders.h index f040e1c8062..110ecb24d7c 100644 --- a/vpr/src/route/clock_connection_builders.h +++ b/vpr/src/route/clock_connection_builders.h @@ -36,7 +36,7 @@ class RoutingToClockConnection : public ClockConnection { std::string clock_to_connect_to; std::string switch_point_name; Coordinates switch_location; - int rr_switch_idx; + int arch_switch_idx; float fc; int seed = 101; @@ -48,7 +48,7 @@ class RoutingToClockConnection : public ClockConnection { void set_clock_name_to_connect_to(std::string clock_name); void set_clock_switch_point_name(std::string clock_switch_point_name); void set_switch_location(int x, int y); - void set_switch(int rr_switch_index); + void set_switch(int arch_switch_index); void set_fc_val(float fc_val); /* @@ -66,7 +66,7 @@ class ClockToClockConneciton : public ClockConnection { std::string from_switch; std::string to_clock; std::string to_switch; - int rr_switch_idx; + int arch_switch_idx; float fc; public: @@ -77,7 +77,7 @@ class ClockToClockConneciton : public ClockConnection { void set_from_clock_switch_point_name(std::string switch_point_name); void set_to_clock_name(std::string clock_name); void set_to_clock_switch_point_name(std::string switch_point_name); - void set_switch(int rr_switch_index); + void set_switch(int arch_switch_index); void set_fc_val(float fc_val); /* @@ -94,7 +94,7 @@ class ClockToPinsConnection : public ClockConnection { private: std::string clock_to_connect_from; std::string switch_point_name; - int rr_switch_idx; + int arch_switch_idx; float fc; public: @@ -103,7 +103,7 @@ class ClockToPinsConnection : public ClockConnection { */ void set_clock_name_to_connect_from(std::string clock_name); void set_clock_switch_point_name(std::string connection_switch_point_name); - void set_switch(int rr_switch_index); + void set_switch(int arch_switch_index); void set_fc_val(float fc_val); /* diff --git a/vpr/src/route/clock_network_builders.cpp b/vpr/src/route/clock_network_builders.cpp index 174b3990ac3..1f1f5cc06a6 100644 --- a/vpr/src/route/clock_network_builders.cpp +++ b/vpr/src/route/clock_network_builders.cpp @@ -296,8 +296,8 @@ void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphB clock_graph); // connect drive point to each half rib using a directed switch - rr_edges_to_create->emplace_back(drive_node_idx, left_node_idx, drive.switch_idx); - rr_edges_to_create->emplace_back(drive_node_idx, right_node_idx, drive.switch_idx); + clock_graph.add_edge(rr_edges_to_create, drive_node_idx, left_node_idx, drive.switch_idx); + clock_graph.add_edge(rr_edges_to_create, drive_node_idx, right_node_idx, drive.switch_idx); } } } @@ -585,8 +585,8 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap clock_graph); // connect drive point to each half spine using a directed switch - rr_edges_to_create->emplace_back(drive_node_idx, left_node_idx, drive.switch_idx); - rr_edges_to_create->emplace_back(drive_node_idx, right_node_idx, drive.switch_idx); + clock_graph.add_edge(rr_edges_to_create, drive_node_idx, left_node_idx, drive.switch_idx); + clock_graph.add_edge(rr_edges_to_create, drive_node_idx, right_node_idx, drive.switch_idx); } } } diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 8b51ad486ef..946e4c98908 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -694,7 +694,10 @@ static void build_rr_graph(const t_graph_type graph_type, /* Update rr_nodes capacities if global routing */ if (graph_type == GRAPH_GLOBAL) { - for (size_t i = 0; i < device_ctx.rr_nodes.size(); i++) { + // Using num_rr_nodes here over device_ctx.rr_nodes.size() because + // clock_modeling::DEDICATED_NETWORK will append some rr nodes after + // the regular graph. + for (size_t i = 0; i < num_rr_nodes; i++) { if (device_ctx.rr_nodes[i].type() == CHANX) { int ylow = device_ctx.rr_nodes[i].ylow(); device_ctx.rr_nodes[i].set_capacity(nodes_per_chan.x_list[ylow]); diff --git a/vpr/src/route/rr_graph_clock.cpp b/vpr/src/route/rr_graph_clock.cpp index 35b827de7a0..4dbb93a070f 100644 --- a/vpr/src/route/rr_graph_clock.cpp +++ b/vpr/src/route/rr_graph_clock.cpp @@ -179,3 +179,18 @@ size_t ClockRRGraphBuilder::estimate_additional_nodes(const DeviceGrid& grid) { return num_additional_nodes; } + +void ClockRRGraphBuilder::add_edge(t_rr_edge_info_set* rr_edges_to_create, + int src_node, + int sink_node, + int arch_switch_idx) const { + const auto& device_ctx = g_vpr_ctx.device(); + VTR_ASSERT(arch_switch_idx < device_ctx.num_arch_switches); + rr_edges_to_create->emplace_back(src_node, sink_node, arch_switch_idx); + + const auto& sw = device_ctx.arch_switch_inf[arch_switch_idx]; + if (!sw.buffered() && !sw.configurable()) { + // This is short, create a reverse edge. + rr_edges_to_create->emplace_back(sink_node, src_node, arch_switch_idx); + } +} diff --git a/vpr/src/route/rr_graph_clock.h b/vpr/src/route/rr_graph_clock.h index 0d282fde216..3ee3a18ee00 100644 --- a/vpr/src/route/rr_graph_clock.h +++ b/vpr/src/route/rr_graph_clock.h @@ -109,6 +109,11 @@ class ClockRRGraphBuilder { static size_t estimate_additional_nodes(const DeviceGrid& grid); + void add_edge(t_rr_edge_info_set* rr_edges_to_create, + int src_node, + int sink_node, + int arch_switch_idx) const; + public: /* Creates the routing resourse (rr) graph of the clock network and appends it to the * existing rr graph created in build_rr_graph for inter-block and intra-block routing. */ From befbff5207fa3322fb3f5d0326efa47e590a05d7 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 24 Jan 2020 08:42:18 -0800 Subject: [PATCH 4/7] Fix some compiler warnings. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/rr_graph.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 946e4c98908..d2f248718e4 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -697,7 +697,7 @@ static void build_rr_graph(const t_graph_type graph_type, // Using num_rr_nodes here over device_ctx.rr_nodes.size() because // clock_modeling::DEDICATED_NETWORK will append some rr nodes after // the regular graph. - for (size_t i = 0; i < num_rr_nodes; i++) { + for (int i = 0; i < num_rr_nodes; i++) { if (device_ctx.rr_nodes[i].type() == CHANX) { int ylow = device_ctx.rr_nodes[i].ylow(); device_ctx.rr_nodes[i].set_capacity(nodes_per_chan.x_list[ylow]); @@ -1311,8 +1311,8 @@ static std::function alloc_and_load_rr_graph(std::vector Date: Fri, 24 Jan 2020 10:37:13 -0800 Subject: [PATCH 5/7] Add verification that no incremental node allocation occurred. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/rr_graph.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index d2f248718e4..d5cf2ac8e41 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -574,8 +574,10 @@ static void build_rr_graph(const t_graph_type graph_type, device_ctx.rr_node_indices = alloc_and_load_rr_node_indices(max_chan_width, grid, &num_rr_nodes, chan_details_x, chan_details_y); + size_t expected_node_count = num_rr_nodes; if (clock_modeling == DEDICATED_NETWORK) { - device_ctx.rr_nodes.reserve(num_rr_nodes + ClockRRGraphBuilder::estimate_additional_nodes(grid)); + expected_node_count += ClockRRGraphBuilder::estimate_additional_nodes(grid); + device_ctx.rr_nodes.reserve(expected_node_count); } device_ctx.rr_nodes.resize(num_rr_nodes); @@ -692,6 +694,12 @@ static void build_rr_graph(const t_graph_type graph_type, is_global_graph, clock_modeling); + // Verify no incremental node allocation. + if (device_ctx.rr_nodes.size() > expected_node_count) { + VTR_LOG_ERROR("Expected no more than %zu nodes, have %zu nodes", + expected_node_count, device_ctx.rr_nodes.size()); + } + /* Update rr_nodes capacities if global routing */ if (graph_type == GRAPH_GLOBAL) { // Using num_rr_nodes here over device_ctx.rr_nodes.size() because From d7a9c0a5ca5aa401bef263e8c72deb8afd004d3d Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 24 Jan 2020 11:47:01 -0800 Subject: [PATCH 6/7] Move rr node storage behind an object. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/base/vpr_context.h | 3 ++- vpr/src/route/clock_network_builders.cpp | 12 ++++----- vpr/src/route/clock_network_builders.h | 15 ++++++----- vpr/src/route/rr_graph.cpp | 34 ++++++++++++------------ vpr/src/route/rr_graph.h | 4 ++- vpr/src/route/rr_graph_clock.h | 5 ++-- 6 files changed, 39 insertions(+), 34 deletions(-) diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index b6a1f9859da..fb0ddb75511 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -9,6 +9,7 @@ #include "vtr_vector.h" #include "atom_netlist.h" #include "clustered_netlist.h" +#include "rr_node_storage.h" #include "rr_node.h" #include "tatum/TimingGraph.hpp" #include "tatum/TimingConstraints.hpp" @@ -144,7 +145,7 @@ struct DeviceContext : public Context { t_chan_width chan_width; /* Structures to define the routing architecture of the FPGA. */ - std::vector rr_nodes; /* autogenerated in build_rr_graph */ + t_rr_node_storage rr_nodes; /* autogenerated in build_rr_graph */ std::vector rr_indexed_data; /* [0 .. num_rr_indexed_data-1] */ diff --git a/vpr/src/route/clock_network_builders.cpp b/vpr/src/route/clock_network_builders.cpp index 1f1f5cc06a6..2bc425fa37d 100644 --- a/vpr/src/route/clock_network_builders.cpp +++ b/vpr/src/route/clock_network_builders.cpp @@ -61,7 +61,7 @@ void ClockNetwork::set_num_instance(int num_inst) { */ void ClockNetwork::create_rr_nodes_for_clock_network_wires(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) { for (int inst_num = 0; inst_num < get_num_inst(); inst_num++) { @@ -215,7 +215,7 @@ size_t ClockRib::estimate_additional_nodes(const DeviceGrid& grid) { } void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) { // Only chany wires need to know the number of segments inorder @@ -307,7 +307,7 @@ int ClockRib::create_chanx_wire(int x_start, int y, int ptc_num, e_direction direction, - std::vector* rr_nodes) { + t_rr_node_storage* rr_nodes) { rr_nodes->emplace_back(); auto node_index = rr_nodes->size() - 1; auto& node = rr_nodes->back(); @@ -502,7 +502,7 @@ size_t ClockSpine::estimate_additional_nodes(const DeviceGrid& grid) { } void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) { auto& grid = clock_graph.grid(); @@ -596,7 +596,7 @@ int ClockSpine::create_chany_wire(int y_start, int x, int ptc_num, e_direction direction, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, int num_segments) { rr_nodes->emplace_back(); auto node_index = rr_nodes->size() - 1; @@ -662,7 +662,7 @@ size_t ClockHTree::estimate_additional_nodes(const DeviceGrid& /*grid*/) { } void ClockHTree::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) { //Remove unused parameter warning diff --git a/vpr/src/route/clock_network_builders.h b/vpr/src/route/clock_network_builders.h index c4caa039b2e..27d720d3f92 100644 --- a/vpr/src/route/clock_network_builders.h +++ b/vpr/src/route/clock_network_builders.h @@ -11,6 +11,7 @@ #include "rr_graph2.h" #include "rr_graph_clock.h" +class t_rr_node_storage; class ClockRRGraphBuilder; enum class ClockType { @@ -102,13 +103,13 @@ class ClockNetwork { /* Creates the RR nodes for the clock network wires and adds them to the reverse lookup * in ClockRRGraphBuilder. The reverse lookup maps the nodes to their switch point locations */ void create_rr_nodes_for_clock_network_wires(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments); virtual void create_segments(std::vector& segment_inf) = 0; virtual void create_rr_nodes_and_internal_edges_for_one_instance( ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) = 0; @@ -163,7 +164,7 @@ class ClockRib : public ClockNetwork { */ void create_segments(std::vector& segment_inf) override; void create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) override; size_t estimate_additional_nodes(const DeviceGrid& grid) override; @@ -172,7 +173,7 @@ class ClockRib : public ClockNetwork { int y, int ptc_num, e_direction direction, - std::vector* rr_nodes); + t_rr_node_storage* rr_nodes); void record_tap_locations(unsigned x_start, unsigned x_end, unsigned y, @@ -222,7 +223,7 @@ class ClockSpine : public ClockNetwork { */ void create_segments(std::vector& segment_inf) override; void create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) override; size_t estimate_additional_nodes(const DeviceGrid& grid) override; @@ -231,7 +232,7 @@ class ClockSpine : public ClockNetwork { int x, int ptc_num, e_direction direction, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, int num_segments); void record_tap_locations(unsigned y_start, unsigned y_end, @@ -257,7 +258,7 @@ class ClockHTree : private ClockNetwork { // TODO: Unimplemented member function void create_segments(std::vector& segment_inf) override; void create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphBuilder& clock_graph, - std::vector* rr_nodes, + t_rr_node_storage* rr_nodes, t_rr_edge_info_set* rr_edges_to_create, int num_segments) override; size_t estimate_additional_nodes(const DeviceGrid& grid) override; diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index d5cf2ac8e41..604b83d7fc9 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -105,7 +105,7 @@ static void build_bidir_rr_opins(const int i, const int j, const e_side side, const t_rr_node_indices& L_rr_node_indices, - const std::vector& rr_nodes, + const t_rr_node_storage& rr_nodes, const t_pin_to_track_lookup& opin_to_track_map, const std::vector>& Fc_out, t_rr_edge_info_set& created_rr_edges, @@ -130,7 +130,7 @@ static void build_unidir_rr_opins(const int i, t_rr_edge_info_set& created_rr_edges, bool* Fc_clipped, const t_rr_node_indices& L_rr_node_indices, - const std::vector& rr_nodes, + const t_rr_node_storage& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs, @@ -143,12 +143,12 @@ static int get_opin_direct_connecions(int x, int from_rr_node, t_rr_edge_info_set& rr_edges_to_create, const t_rr_node_indices& L_rr_node_indices, - const std::vector& rr_nodes, + const t_rr_node_storage& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs); -static std::function alloc_and_load_rr_graph(std::vector& L_rr_node, +static std::function alloc_and_load_rr_graph(t_rr_node_storage& L_rr_node, const int num_seg_types, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, @@ -209,7 +209,7 @@ static std::vector> alloc_and_load_perturb_ipins(const int L_n static void build_rr_sinks_sources(const int i, const int j, - std::vector& L_rr_node, + t_rr_node_storage& L_rr_node, t_rr_edge_info_set& rr_edges_to_create, const t_rr_node_indices& L_rr_node_indices, const int delayless_switch, @@ -231,13 +231,13 @@ static void build_rr_chan(const int i, const t_chan_details& chan_details_y, const t_rr_node_indices& L_rr_node_indices, t_rr_edge_info_set& created_rr_edges, - std::vector& L_rr_node, + t_rr_node_storage& L_rr_node, const int wire_to_ipin_switch, const enum e_directionality directionality); void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create); -void alloc_and_load_edges(std::vector& L_rr_node, +void alloc_and_load_edges(t_rr_node_storage& L_rr_node, const t_rr_edge_info_set& rr_edges_to_create); static void alloc_and_load_rr_switch_inf(const int num_arch_switches, @@ -275,7 +275,7 @@ static std::vector> alloc_and_load_actual_fc(const std::vector< const enum e_directionality directionality, bool* Fc_clipped); -static int pick_best_direct_connect_target_rr_node(const std::vector& rr_nodes, +static int pick_best_direct_connect_target_rr_node(const t_rr_node_storage& rr_nodes, int from_rr, const std::vector& candidate_rr_nodes); @@ -1185,7 +1185,7 @@ static void free_type_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map, /* Does the actual work of allocating the rr_graph and filling all the * * appropriate values. Everything up to this was just a prelude! */ -static std::function alloc_and_load_rr_graph(std::vector& L_rr_node, +static std::function alloc_and_load_rr_graph(t_rr_node_storage& L_rr_node, const int num_seg_types, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, @@ -1333,7 +1333,7 @@ static void build_bidir_rr_opins(const int i, const int j, const e_side side, const t_rr_node_indices& L_rr_node_indices, - const std::vector& rr_nodes, + const t_rr_node_storage& rr_nodes, const t_pin_to_track_lookup& opin_to_track_map, const std::vector>& Fc_out, t_rr_edge_info_set& rr_edges_to_create, @@ -1426,7 +1426,7 @@ void free_rr_graph() { static void build_rr_sinks_sources(const int i, const int j, - std::vector& L_rr_node, + t_rr_node_storage& L_rr_node, t_rr_edge_info_set& rr_edges_to_create, const t_rr_node_indices& L_rr_node_indices, const int delayless_switch, @@ -1555,7 +1555,7 @@ static void build_rr_sinks_sources(const int i, //Create the actual edges } -void init_fan_in(std::vector& L_rr_node, const int num_rr_nodes) { +void init_fan_in(t_rr_node_storage& L_rr_node, const int num_rr_nodes) { //Loads fan-ins for all nodes //Reset all fan-ins to zero @@ -1591,7 +1591,7 @@ static void build_rr_chan(const int x_coord, const t_chan_details& chan_details_y, const t_rr_node_indices& L_rr_node_indices, t_rr_edge_info_set& rr_edges_to_create, - std::vector& L_rr_node, + t_rr_node_storage& L_rr_node, const int wire_to_ipin_switch, const enum e_directionality directionality) { /* this function builds both x and y-directed channel segments, so set up our @@ -1754,7 +1754,7 @@ void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create) { rr_edges_to_create.erase(std::unique(rr_edges_to_create.begin(), rr_edges_to_create.end()), rr_edges_to_create.end()); } -void alloc_and_load_edges(std::vector& L_rr_node, +void alloc_and_load_edges(t_rr_node_storage& L_rr_node, const t_rr_edge_info_set& rr_edges_to_create) { /* Sets up all the edge related information for rr_node */ @@ -2590,7 +2590,7 @@ std::string describe_rr_node(int inode) { return msg; } -static void build_unidir_rr_opins(const int i, const int j, const e_side side, const DeviceGrid& grid, const std::vector>& Fc_out, const int max_chan_width, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, vtr::NdMatrix& Fc_xofs, vtr::NdMatrix& Fc_yofs, t_rr_edge_info_set& rr_edges_to_create, bool* Fc_clipped, const t_rr_node_indices& L_rr_node_indices, const std::vector& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs, const int num_seg_types) { +static void build_unidir_rr_opins(const int i, const int j, const e_side side, const DeviceGrid& grid, const std::vector>& Fc_out, const int max_chan_width, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, vtr::NdMatrix& Fc_xofs, vtr::NdMatrix& Fc_yofs, t_rr_edge_info_set& rr_edges_to_create, bool* Fc_clipped, const t_rr_node_indices& L_rr_node_indices, const t_rr_node_storage& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs, const int num_seg_types) { /* * This routine adds the edges from opins to channels at the specified * grid location (i,j) and grid tile side @@ -2825,7 +2825,7 @@ static int get_opin_direct_connecions(int x, int from_rr_node, t_rr_edge_info_set& rr_edges_to_create, const t_rr_node_indices& L_rr_node_indices, - const std::vector& rr_nodes, + const t_rr_node_storage& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs) { @@ -3037,7 +3037,7 @@ static std::vector alloc_and_load_perturb_opins(const t_physical_tile_type return perturb_opins; } -static int pick_best_direct_connect_target_rr_node(const std::vector& rr_nodes, +static int pick_best_direct_connect_target_rr_node(const t_rr_node_storage& rr_nodes, int from_rr, const std::vector& candidate_rr_nodes) { //With physically equivalent pins there may be multiple candidate rr nodes (which are equivalent) diff --git a/vpr/src/route/rr_graph.h b/vpr/src/route/rr_graph.h index f55a64f7f9f..527b06a4a19 100644 --- a/vpr/src/route/rr_graph.h +++ b/vpr/src/route/rr_graph.h @@ -45,7 +45,9 @@ void free_rr_graph(); //Returns a brief one-line summary of an RR node std::string describe_rr_node(int inode); -void init_fan_in(std::vector& L_rr_node, const int num_rr_nodes); +class t_rr_node_storage; + +void init_fan_in(t_rr_node_storage& L_rr_node, const int num_rr_nodes); // Sets the spec for the rr_switch based on the arch switch void load_rr_switch_from_arch_switch(int arch_switch_idx, diff --git a/vpr/src/route/rr_graph_clock.h b/vpr/src/route/rr_graph_clock.h index 3ee3a18ee00..162ca58e6c8 100644 --- a/vpr/src/route/rr_graph_clock.h +++ b/vpr/src/route/rr_graph_clock.h @@ -15,6 +15,7 @@ class ClockNetwork; class ClockConnection; +class t_rr_node_storage; class SwitchPoint { /* A switch point object: keeps information on the location and and rr_node indices @@ -76,7 +77,7 @@ class ClockRRGraphBuilder { ClockRRGraphBuilder( const t_chan_width& chan_width, const DeviceGrid& grid, - std::vector* rr_nodes) + t_rr_node_storage* rr_nodes) : chan_width_(chan_width) , grid_(grid) , rr_nodes_(rr_nodes) @@ -132,7 +133,7 @@ class ClockRRGraphBuilder { const t_chan_width& chan_width_; const DeviceGrid& grid_; - std::vector* rr_nodes_; + t_rr_node_storage* rr_nodes_; int chanx_ptc_idx_; int chany_ptc_idx_; From 3c19d3d768144d0e89001a8f861eadfab0cbd9f5 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 24 Jan 2020 14:12:51 -0800 Subject: [PATCH 7/7] Convert t_rr_node to a fly-weight object. This should have a negliable performance impact, but this enables future changes to modify how rr nodes and rr edges are storaged. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/base/read_route.cpp | 2 +- vpr/src/draw/draw.cpp | 14 +- vpr/src/draw/draw.h | 2 +- vpr/src/power/power.cpp | 94 +++++------ vpr/src/route/clock_network_builders.cpp | 4 +- vpr/src/route/rr_graph.cpp | 2 +- vpr/src/route/rr_graph_reader.cpp | 10 +- vpr/src/route/rr_node.cpp | 190 +++++++++-------------- vpr/src/route/rr_node.h | 107 ++++--------- vpr/src/route/rr_node_fwd.h | 4 +- vpr/src/route/rr_node_impl.h | 163 +++++++++++++++++++ vpr/src/route/rr_node_storage.h | 133 ++++++++++++++++ 12 files changed, 459 insertions(+), 266 deletions(-) create mode 100644 vpr/src/route/rr_node_impl.h create mode 100644 vpr/src/route/rr_node_storage.h diff --git a/vpr/src/base/read_route.cpp b/vpr/src/base/read_route.cpp index 9ec4069fe2c..31408f21223 100644 --- a/vpr/src/base/read_route.cpp +++ b/vpr/src/base/read_route.cpp @@ -231,7 +231,7 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file } else if (tokens[0] == "Node:") { /*An actual line, go through each node and add it to the route tree*/ inode = atoi(tokens[1].c_str()); - auto& node = device_ctx.rr_nodes[inode]; + auto node = device_ctx.rr_nodes[inode]; /*First node needs to be source. It is isolated to correctly set heap head.*/ if (node_count == 0 && tokens[2] != "SOURCE") { diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp index 505928cf667..07603ed8398 100644 --- a/vpr/src/draw/draw.cpp +++ b/vpr/src/draw/draw.cpp @@ -2042,10 +2042,10 @@ static void draw_rr_pin(int inode, const ezgl::color& color, ezgl::renderer* g) * the physical pin is on. */ void draw_get_rr_pin_coords(int inode, float* xcen, float* ycen) { auto& device_ctx = g_vpr_ctx.device(); - draw_get_rr_pin_coords(&device_ctx.rr_nodes[inode], xcen, ycen); + draw_get_rr_pin_coords(device_ctx.rr_nodes[inode], xcen, ycen); } -void draw_get_rr_pin_coords(const t_rr_node* node, float* xcen, float* ycen) { +void draw_get_rr_pin_coords(const t_rr_node node, float* xcen, float* ycen) { t_draw_coords* draw_coords = get_draw_coords_vars(); int i, j, k, ipin, pins_per_sub_tile; @@ -2053,13 +2053,13 @@ void draw_get_rr_pin_coords(const t_rr_node* node, float* xcen, float* ycen) { t_physical_tile_type_ptr type; auto& device_ctx = g_vpr_ctx.device(); - i = node->xlow(); - j = node->ylow(); + i = node.xlow(); + j = node.ylow(); xc = draw_coords->tile_x[i]; yc = draw_coords->tile_y[j]; - ipin = node->ptc_num(); + ipin = node.ptc_num(); type = device_ctx.grid[i][j].type; pins_per_sub_tile = type->num_pins / type->capacity; k = ipin / pins_per_sub_tile; @@ -2071,7 +2071,7 @@ void draw_get_rr_pin_coords(const t_rr_node* node, float* xcen, float* ycen) { step = (float)(draw_coords->get_tile_width()) / (float)(type->num_pins + type->capacity); offset = (ipin + k + 1) * step; - switch (node->side()) { + switch (node.side()) { case LEFT: yc += offset; break; @@ -2092,7 +2092,7 @@ void draw_get_rr_pin_coords(const t_rr_node* node, float* xcen, float* ycen) { default: vpr_throw(VPR_ERROR_OTHER, __FILE__, __LINE__, - "in draw_get_rr_pin_coords: Unexpected side %s.\n", node->side_string()); + "in draw_get_rr_pin_coords: Unexpected side %s.\n", node.side_string()); break; } diff --git a/vpr/src/draw/draw.h b/vpr/src/draw/draw.h index acc9d214572..e2434a4a23a 100644 --- a/vpr/src/draw/draw.h +++ b/vpr/src/draw/draw.h @@ -32,7 +32,7 @@ void free_draw_structs(); #ifndef NO_GRAPHICS void draw_get_rr_pin_coords(int inode, float* xcen, float* ycen); -void draw_get_rr_pin_coords(const t_rr_node* node, float* xcen, float* ycen); +void draw_get_rr_pin_coords(const t_rr_node node, float* xcen, float* ycen); void draw_triangle_along_line(ezgl::renderer* g, ezgl::point2d start, ezgl::point2d end, float relative_position = 1., float arrow_size = DEFAULT_ARROW_SIZE); void draw_triangle_along_line(ezgl::renderer* g, ezgl::point2d loc, ezgl::point2d start, ezgl::point2d end, float arrow_size = DEFAULT_ARROW_SIZE); diff --git a/vpr/src/power/power.cpp b/vpr/src/power/power.cpp index d4e17c0f852..1a2587c640e 100644 --- a/vpr/src/power/power.cpp +++ b/vpr/src/power/power.cpp @@ -815,19 +815,19 @@ static void power_usage_routing(t_power_usage* power_usage, t_trace* trace; for (trace = route_ctx.trace[net_id].head; trace != nullptr; trace = trace->next) { - auto node = &device_ctx.rr_nodes[trace->index]; + auto node = device_ctx.rr_nodes[trace->index]; t_rr_node_power* node_power = &rr_node_power[trace->index]; if (node_power->visited) { continue; } - for (t_edge_size edge_idx = 0; edge_idx < node->num_edges(); edge_idx++) { - if (node->edge_sink_node(edge_idx) != OPEN) { - auto next_node = &device_ctx.rr_nodes[node->edge_sink_node(edge_idx)]; - t_rr_node_power* next_node_power = &rr_node_power[node->edge_sink_node(edge_idx)]; + for (t_edge_size edge_idx = 0; edge_idx < node.num_edges(); edge_idx++) { + if (node.edge_sink_node(edge_idx) != OPEN) { + auto next_node = device_ctx.rr_nodes[node.edge_sink_node(edge_idx)]; + t_rr_node_power* next_node_power = &rr_node_power[node.edge_sink_node(edge_idx)]; - switch (next_node->type()) { + switch (next_node.type()) { case CHANX: case CHANY: case IPIN: @@ -837,9 +837,9 @@ static void power_usage_routing(t_power_usage* power_usage, next_node_power->in_dens[next_node_power->num_inputs] = clb_net_density(node_power->net_num); next_node_power->in_prob[next_node_power->num_inputs] = clb_net_prob(node_power->net_num); next_node_power->num_inputs++; - if (next_node_power->num_inputs > next_node->fan_in()) { + if (next_node_power->num_inputs > next_node.fan_in()) { VTR_LOG("%d %d\n", next_node_power->num_inputs, - next_node->fan_in()); + next_node.fan_in()); fflush(nullptr); VTR_ASSERT(0); } @@ -857,7 +857,7 @@ static void power_usage_routing(t_power_usage* power_usage, /* Calculate power of all routing entities */ for (size_t rr_node_idx = 0; rr_node_idx < device_ctx.rr_nodes.size(); rr_node_idx++) { t_power_usage sub_power_usage; - auto node = &device_ctx.rr_nodes[rr_node_idx]; + auto node = device_ctx.rr_nodes[rr_node_idx]; t_rr_node_power* node_power = &rr_node_power[rr_node_idx]; float C_wire; float buffer_size; @@ -866,7 +866,7 @@ static void power_usage_routing(t_power_usage* power_usage, //float C_per_seg_split; int wire_length; - switch (node->type()) { + switch (node.type()) { case SOURCE: case SINK: case OPIN: @@ -877,13 +877,13 @@ static void power_usage_routing(t_power_usage* power_usage, * - Driver (accounted for at end of CHANX/Y - see below) * - Multiplexor */ - if (node->fan_in()) { + if (node.fan_in()) { VTR_ASSERT(node_power->in_dens); VTR_ASSERT(node_power->in_prob); /* Multiplexor */ power_usage_mux_multilevel(&sub_power_usage, - power_get_mux_arch(node->fan_in(), + power_get_mux_arch(node.fan_in(), power_ctx.arch->mux_transistor_size), node_power->in_prob, node_power->in_dens, node_power->selected_input, true, @@ -904,19 +904,19 @@ static void power_usage_routing(t_power_usage* power_usage, VTR_ASSERT(node_power->in_prob); wire_length = 0; - if (node->type() == CHANX) { - wire_length = node->xhigh() - node->xlow() + 1; - } else if (node->type() == CHANY) { - wire_length = node->yhigh() - node->ylow() + 1; + if (node.type() == CHANX) { + wire_length = node.xhigh() - node.xlow() + 1; + } else if (node.type() == CHANY) { + wire_length = node.yhigh() - node.ylow() + 1; } C_wire = wire_length - * segment_inf[device_ctx.rr_indexed_data[node->cost_index()].seg_index].Cmetal; + * segment_inf[device_ctx.rr_indexed_data[node.cost_index()].seg_index].Cmetal; //(double)power_ctx.commonly_used->tile_length); - VTR_ASSERT(node_power->selected_input < node->fan_in()); + VTR_ASSERT(node_power->selected_input < node.fan_in()); /* Multiplexor */ power_usage_mux_multilevel(&sub_power_usage, - power_get_mux_arch(node->fan_in(), + power_get_mux_arch(node.fan_in(), power_ctx.arch->mux_transistor_size), node_power->in_prob, node_power->in_dens, node_power->selected_input, true, power_ctx.solution_inf.T_crit); @@ -979,10 +979,10 @@ static void power_usage_routing(t_power_usage* power_usage, /* Determine types of switches that this wire drives */ connectionbox_fanout = 0; switchbox_fanout = 0; - for (t_edge_size iedge = 0; iedge < node->num_edges(); iedge++) { - if (node->edge_switch(iedge) == routing_arch->wire_to_rr_ipin_switch) { + for (t_edge_size iedge = 0; iedge < node.num_edges(); iedge++) { + if (node.edge_switch(iedge) == routing_arch->wire_to_rr_ipin_switch) { connectionbox_fanout++; - } else if (node->edge_switch(iedge) == routing_arch->delayless_switch) { + } else if (node.edge_switch(iedge) == routing_arch->delayless_switch) { /* Do nothing */ } else { switchbox_fanout++; @@ -1205,37 +1205,37 @@ void power_routing_init(const t_det_routing_arch* routing_arch) { for (size_t rr_node_idx = 0; rr_node_idx < device_ctx.rr_nodes.size(); rr_node_idx++) { int fanout_to_IPIN = 0; int fanout_to_seg = 0; - auto node = &device_ctx.rr_nodes[rr_node_idx]; + auto node = device_ctx.rr_nodes[rr_node_idx]; t_rr_node_power* node_power = &rr_node_power[rr_node_idx]; - switch (node->type()) { + switch (node.type()) { case IPIN: max_IPIN_fanin = std::max(max_IPIN_fanin, - static_cast(node->fan_in())); - max_fanin = std::max(max_fanin, static_cast(node->fan_in())); + static_cast(node.fan_in())); + max_fanin = std::max(max_fanin, static_cast(node.fan_in())); - node_power->in_dens = (float*)vtr::calloc(node->fan_in(), + node_power->in_dens = (float*)vtr::calloc(node.fan_in(), sizeof(float)); - node_power->in_prob = (float*)vtr::calloc(node->fan_in(), + node_power->in_prob = (float*)vtr::calloc(node.fan_in(), sizeof(float)); break; case CHANX: case CHANY: - for (t_edge_size iedge = 0; iedge < node->num_edges(); iedge++) { - if (node->edge_switch(iedge) == routing_arch->wire_to_rr_ipin_switch) { + for (t_edge_size iedge = 0; iedge < node.num_edges(); iedge++) { + if (node.edge_switch(iedge) == routing_arch->wire_to_rr_ipin_switch) { fanout_to_IPIN++; - } else if (node->edge_switch(iedge) != routing_arch->delayless_switch) { + } else if (node.edge_switch(iedge) != routing_arch->delayless_switch) { fanout_to_seg++; } } max_seg_to_IPIN_fanout = std::max(max_seg_to_IPIN_fanout, fanout_to_IPIN); max_seg_to_seg_fanout = std::max(max_seg_to_seg_fanout, fanout_to_seg); - max_fanin = std::max(max_fanin, static_cast(node->fan_in())); + max_fanin = std::max(max_fanin, static_cast(node.fan_in())); - node_power->in_dens = (float*)vtr::calloc(node->fan_in(), + node_power->in_dens = (float*)vtr::calloc(node.fan_in(), sizeof(float)); - node_power->in_prob = (float*)vtr::calloc(node->fan_in(), + node_power->in_prob = (float*)vtr::calloc(node.fan_in(), sizeof(float)); break; default: @@ -1254,14 +1254,14 @@ void power_routing_init(const t_det_routing_arch* routing_arch) { /* Populate driver switch type */ for (size_t rr_node_idx = 0; rr_node_idx < device_ctx.rr_nodes.size(); rr_node_idx++) { - auto node = &device_ctx.rr_nodes[rr_node_idx]; + auto node = device_ctx.rr_nodes[rr_node_idx]; - for (t_edge_size edge_idx = 0; edge_idx < node->num_edges(); edge_idx++) { - if (node->edge_sink_node(edge_idx) != OPEN) { - if (rr_node_power[node->edge_sink_node(edge_idx)].driver_switch_type == OPEN) { - rr_node_power[node->edge_sink_node(edge_idx)].driver_switch_type = node->edge_switch(edge_idx); + for (t_edge_size edge_idx = 0; edge_idx < node.num_edges(); edge_idx++) { + if (node.edge_sink_node(edge_idx) != OPEN) { + if (rr_node_power[node.edge_sink_node(edge_idx)].driver_switch_type == OPEN) { + rr_node_power[node.edge_sink_node(edge_idx)].driver_switch_type = node.edge_switch(edge_idx); } else { - VTR_ASSERT(rr_node_power[node->edge_sink_node(edge_idx)].driver_switch_type == node->edge_switch(edge_idx)); + VTR_ASSERT(rr_node_power[node.edge_sink_node(edge_idx)].driver_switch_type == node.edge_switch(edge_idx)); } } } @@ -1270,13 +1270,13 @@ void power_routing_init(const t_det_routing_arch* routing_arch) { /* Find Max Fanout of Routing Buffer */ t_edge_size max_seg_fanout = 0; for (size_t rr_node_idx = 0; rr_node_idx < device_ctx.rr_nodes.size(); rr_node_idx++) { - auto node = &device_ctx.rr_nodes[rr_node_idx]; + auto node = device_ctx.rr_nodes[rr_node_idx]; - switch (node->type()) { + switch (node.type()) { case CHANX: case CHANY: - if (node->num_edges() > max_seg_fanout) { - max_seg_fanout = node->num_edges(); + if (node.num_edges() > max_seg_fanout) { + max_seg_fanout = node.num_edges(); } break; default: @@ -1358,14 +1358,14 @@ bool power_uninit() { bool error = false; for (size_t rr_node_idx = 0; rr_node_idx < device_ctx.rr_nodes.size(); rr_node_idx++) { - auto node = &device_ctx.rr_nodes[rr_node_idx]; + auto node = device_ctx.rr_nodes[rr_node_idx]; t_rr_node_power* node_power = &rr_node_power[rr_node_idx]; - switch (node->type()) { + switch (node.type()) { case CHANX: case CHANY: case IPIN: - if (node->fan_in()) { + if (node.fan_in()) { free(node_power->in_dens); free(node_power->in_prob); } diff --git a/vpr/src/route/clock_network_builders.cpp b/vpr/src/route/clock_network_builders.cpp index 2bc425fa37d..2af4509c013 100644 --- a/vpr/src/route/clock_network_builders.cpp +++ b/vpr/src/route/clock_network_builders.cpp @@ -310,7 +310,7 @@ int ClockRib::create_chanx_wire(int x_start, t_rr_node_storage* rr_nodes) { rr_nodes->emplace_back(); auto node_index = rr_nodes->size() - 1; - auto& node = rr_nodes->back(); + auto node = rr_nodes->back(); node.set_coordinates(x_start, y, x_end, y); node.set_type(CHANX); @@ -600,7 +600,7 @@ int ClockSpine::create_chany_wire(int y_start, int num_segments) { rr_nodes->emplace_back(); auto node_index = rr_nodes->size() - 1; - auto& node = rr_nodes->back(); + auto node = rr_nodes->back(); node.set_coordinates(x, y_start, x, y_end); node.set_type(CHANY); diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 604b83d7fc9..2447a532dc1 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -931,7 +931,7 @@ static void remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin auto& device_ctx = g_vpr_ctx.mutable_device(); for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) { - auto& from_node = device_ctx.rr_nodes[inode]; + auto from_node = device_ctx.rr_nodes[inode]; int num_edges = from_node.num_edges(); for (int iedge = 0; iedge < num_edges; iedge++) { const t_rr_node& to_node = device_ctx.rr_nodes[from_node.edge_sink_node(iedge)]; diff --git a/vpr/src/route/rr_graph_reader.cpp b/vpr/src/route/rr_graph_reader.cpp index e9380b12d7e..cc1f87e89c6 100644 --- a/vpr/src/route/rr_graph_reader.cpp +++ b/vpr/src/route/rr_graph_reader.cpp @@ -262,7 +262,7 @@ void process_seg_id(pugi::xml_node parent, const pugiutil::loc_data& loc_data) { while (rr_node) { id = get_attribute(rr_node, "id", loc_data).as_int(); - auto& node = device_ctx.rr_nodes[id]; + auto node = device_ctx.rr_nodes[id]; segmentSubnode = get_single_child(rr_node, "segment", loc_data, pugiutil::OPTIONAL); if (segmentSubnode) { @@ -289,7 +289,7 @@ void process_nodes(pugi::xml_node parent, const pugiutil::loc_data& loc_data) { while (rr_node) { int inode = get_attribute(rr_node, "id", loc_data).as_int(); - auto& node = device_ctx.rr_nodes[inode]; + auto node = device_ctx.rr_nodes[inode]; const char* node_type = get_attribute(rr_node, "type", loc_data).as_string(); if (strcmp(node_type, "CHANX") == 0) { @@ -726,7 +726,7 @@ void process_rr_node_indices(const DeviceGrid& grid) { * Note that CHANX and CHANY 's x and y are swapped due to the chan and seg convention. */ for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) { - auto& node = device_ctx.rr_nodes[inode]; + auto node = device_ctx.rr_nodes[inode]; if (node.type() == SOURCE || node.type() == SINK) { for (int ix = node.xlow(); ix <= node.xhigh(); ix++) { for (int iy = node.ylow(); iy <= node.yhigh(); iy++) { @@ -788,7 +788,7 @@ void process_rr_node_indices(const DeviceGrid& grid) { int count; /* CHANX and CHANY need to reevaluated with its ptc num as the correct index*/ for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) { - auto& node = device_ctx.rr_nodes[inode]; + auto node = device_ctx.rr_nodes[inode]; if (node.type() == CHANX) { for (int iy = node.ylow(); iy <= node.yhigh(); iy++) { for (int ix = node.xlow(); ix <= node.xhigh(); ix++) { @@ -863,7 +863,7 @@ void set_cost_indices(pugi::xml_node parent, const pugiutil::loc_data& loc_data, while (rr_node) { int inode = get_attribute(rr_node, "id", loc_data).as_int(); - auto& node = device_ctx.rr_nodes[inode]; + auto node = device_ctx.rr_nodes[inode]; /*CHANX and CHANY cost index is dependent on the segment id*/ diff --git a/vpr/src/route/rr_node.cpp b/vpr/src/route/rr_node.cpp index 97aa653d450..2dd2fa1fd50 100644 --- a/vpr/src/route/rr_node.cpp +++ b/vpr/src/route/rr_node.cpp @@ -1,4 +1,5 @@ #include "rr_node.h" +#include "rr_node_storage.h" #include "globals.h" #include "vpr_error.h" @@ -10,70 +11,6 @@ const char* t_rr_node::type_string() const { return rr_node_typename[type()]; } -short t_rr_node::xlow() const { - return xlow_; -} - -short t_rr_node::ylow() const { - return ylow_; -} - -short t_rr_node::xhigh() const { - return xhigh_; -} - -short t_rr_node::yhigh() const { - return yhigh_; -} - -short t_rr_node::ptc_num() const { - return ptc_.pin_num; //TODO eventually remove -} - -short t_rr_node::pin_num() const { - if (type() != IPIN && type() != OPIN) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'pin_num' for non-IPIN/OPIN type '%s'", type_string()); - } - return ptc_.pin_num; -} - -short t_rr_node::track_num() const { - if (type() != CHANX && type() != CHANY) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'track_num' for non-CHANX/CHANY type '%s'", type_string()); - } - return ptc_.track_num; -} - -short t_rr_node::class_num() const { - if (type() != SOURCE && type() != SINK) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'class_num' for non-SOURCE/SINK type '%s'", type_string()); - } - return ptc_.class_num; -} - -short t_rr_node::cost_index() const { - return cost_index_; -} - -short t_rr_node::rc_index() const { - return rc_index_; -} - -short t_rr_node::capacity() const { - return capacity_; -} - -t_edge_size t_rr_node::fan_in() const { - return fan_in_; -} - -e_direction t_rr_node::direction() const { - if (type() != CHANX && type() != CHANY) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'direction' for non-channel type '%s'", type_string()); - } - return dir_side_.direction; -} - const char* t_rr_node::direction_string() const { if (direction() == INC_DIRECTION) { return "INC_DIR"; @@ -87,20 +24,14 @@ const char* t_rr_node::direction_string() const { return "NO_DIR"; } -e_side t_rr_node::side() const { - if (type() != IPIN && type() != OPIN) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'side' for non-IPIN/OPIN type '%s'", type_string()); - } - return dir_side_.side; -} - const char* t_rr_node::side_string() const { return SIDE_STRING[side()]; } //Returns the max 'length' over the x or y direction short t_rr_node::length() const { - return std::max(yhigh_ - ylow_, xhigh_ - xlow_); + const auto& node = storage_->get(id_); + return std::max(node.yhigh_ - node.ylow_, node.xhigh_ - node.xlow_); } bool t_rr_node::edge_is_configurable(t_edge_size iedge) const { @@ -124,8 +55,9 @@ float t_rr_node::C() const { bool t_rr_node::validate() const { //Check internal assumptions about RR node are valid + auto& node = storage_->get(id_); - if (num_edges_ > edges_capacity_) { + if (node.num_edges_ > node.edges_capacity_) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "RR Node number of edges exceeded edge capacity"); } @@ -151,7 +83,8 @@ bool t_rr_node::validate() const { } void t_rr_node::set_type(t_rr_type new_type) { - type_ = new_type; + auto& node = storage_->get(id_); + node.type_ = new_type; } /* @@ -159,163 +92,180 @@ void t_rr_node::set_type(t_rr_type new_type) { * They do not have to be in any particular order. */ void t_rr_node::set_coordinates(short x1, short y1, short x2, short y2) { + auto& node = storage_->get(id_); if (x1 < x2) { - xlow_ = x1; - xhigh_ = x2; + node.xlow_ = x1; + node.xhigh_ = x2; } else { - xlow_ = x2; - xhigh_ = x1; + node.xlow_ = x2; + node.xhigh_ = x1; } if (y1 < y2) { - ylow_ = y1; - yhigh_ = y2; + node.ylow_ = y1; + node.yhigh_ = y2; } else { - ylow_ = y2; - yhigh_ = y1; + node.ylow_ = y2; + node.yhigh_ = y1; } } void t_rr_node::set_ptc_num(short new_ptc_num) { - ptc_.pin_num = new_ptc_num; //TODO: eventually remove + auto& node = storage_->get(id_); + node.ptc_.pin_num = new_ptc_num; //TODO: eventually remove } void t_rr_node::set_pin_num(short new_pin_num) { if (type() != IPIN && type() != OPIN) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'pin_num' for non-IPIN/OPIN type '%s'", type_string()); } - ptc_.pin_num = new_pin_num; + auto& node = storage_->get(id_); + node.ptc_.pin_num = new_pin_num; } void t_rr_node::set_track_num(short new_track_num) { if (type() != CHANX && type() != CHANY) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'track_num' for non-CHANX/CHANY type '%s'", type_string()); } - ptc_.track_num = new_track_num; + auto& node = storage_->get(id_); + node.ptc_.track_num = new_track_num; } void t_rr_node::set_class_num(short new_class_num) { if (type() != SOURCE && type() != SINK) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'class_num' for non-SOURCE/SINK type '%s'", type_string()); } - ptc_.class_num = new_class_num; + auto& node = storage_->get(id_); + node.ptc_.class_num = new_class_num; } void t_rr_node::set_cost_index(size_t new_cost_index) { - if (new_cost_index >= std::numeric_limits::max()) { + auto& node = storage_->get(id_); + if (new_cost_index >= std::numeric_limits::max()) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set cost_index_ %zu above cost_index storage max value.", new_cost_index); } - cost_index_ = new_cost_index; + node.cost_index_ = new_cost_index; } void t_rr_node::set_rc_index(short new_rc_index) { - rc_index_ = new_rc_index; + auto& node = storage_->get(id_); + node.rc_index_ = new_rc_index; } void t_rr_node::set_capacity(short new_capacity) { VTR_ASSERT(new_capacity >= 0); - capacity_ = new_capacity; + auto& node = storage_->get(id_); + node.capacity_ = new_capacity; } void t_rr_node::set_fan_in(t_edge_size new_fan_in) { - fan_in_ = new_fan_in; + auto& node = storage_->get(id_); + node.fan_in_ = new_fan_in; } t_edge_size t_rr_node::add_edge(int sink_node, int iswitch) { - if (edges_capacity_ == num_edges_) { - constexpr size_t MAX_EDGE_COUNT = std::numeric_limits::max(); - if (edges_capacity_ == MAX_EDGE_COUNT) { + auto& node = storage_->get(id_); + if (node.edges_capacity_ == node.num_edges_) { + constexpr size_t MAX_EDGE_COUNT = std::numeric_limits::max(); + if (node.edges_capacity_ == MAX_EDGE_COUNT) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Maximum RR Node out-edge count (%zu) exceeded", MAX_EDGE_COUNT); } //Grow - size_t new_edges_capacity = std::max(1, 2 * edges_capacity_); + size_t new_edges_capacity = std::max(1, 2 * node.edges_capacity_); new_edges_capacity = std::min(new_edges_capacity, MAX_EDGE_COUNT); //Clip to maximum count - auto new_edges = std::make_unique(new_edges_capacity); + auto new_edges = std::make_unique(new_edges_capacity); //Copy - std::copy_n(edges_.get(), num_edges_, new_edges.get()); + std::copy_n(node.edges_.get(), node.num_edges_, new_edges.get()); //Replace - edges_ = std::move(new_edges); - edges_capacity_ = new_edges_capacity; + node.edges_ = std::move(new_edges); + node.edges_capacity_ = new_edges_capacity; } - VTR_ASSERT(num_edges_ < edges_capacity_); + VTR_ASSERT(node.num_edges_ < node.edges_capacity_); - edges_[num_edges_].sink_node = sink_node; - edges_[num_edges_].switch_id = iswitch; + node.edges_[node.num_edges_].sink_node = sink_node; + node.edges_[node.num_edges_].switch_id = iswitch; - ++num_edges_; + ++node.num_edges_; - return num_edges_; + return node.num_edges_; } void t_rr_node::shrink_to_fit() { //Shrink - auto new_edges = std::make_unique(num_edges_); + auto& node = storage_->get(id_); + auto new_edges = std::make_unique(node.num_edges_); //Copy - std::copy_n(edges_.get(), num_edges_, new_edges.get()); + std::copy_n(node.edges_.get(), node.num_edges_, new_edges.get()); //Replace - edges_ = std::move(new_edges); - edges_capacity_ = num_edges_; + node.edges_ = std::move(new_edges); + node.edges_capacity_ = node.num_edges_; } void t_rr_node::partition_edges() { auto& device_ctx = g_vpr_ctx.device(); - auto is_configurable = [&](const t_rr_edge& edge) { + auto is_configurable = [&](const t_rr_node_data::t_rr_edge& edge) { auto iswitch = edge.switch_id; return device_ctx.rr_switch_inf[iswitch].configurable(); }; //Partition the edges so the first set of edges are all configurable, and the later are not - auto first_non_config_edge = std::partition(edges_.get(), edges_.get() + num_edges_, is_configurable); + auto& node = storage_->get(id_); + auto first_non_config_edge = std::partition(node.edges_.get(), node.edges_.get() + node.num_edges_, is_configurable); - size_t num_conf_edges = std::distance(edges_.get(), first_non_config_edge); + size_t num_conf_edges = std::distance(node.edges_.get(), first_non_config_edge); size_t num_non_conf_edges = num_edges() - num_conf_edges; //Note we calculate using the size_t to get full range //Check that within allowable range (no overflow when stored as num_non_configurable_edges_ - if (num_non_conf_edges > std::numeric_limits::max()) { + if (num_non_conf_edges > std::numeric_limits::max()) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Exceeded RR node maximum number of non-configurable edges"); } - num_non_configurable_edges_ = num_non_conf_edges; //Narrowing + node.num_non_configurable_edges_ = num_non_conf_edges; //Narrowing } void t_rr_node::set_num_edges(size_t new_num_edges) { + auto& node = storage_->get(id_); VTR_ASSERT(new_num_edges <= std::numeric_limits::max()); - num_edges_ = new_num_edges; - edges_capacity_ = new_num_edges; + node.num_edges_ = new_num_edges; + node.edges_capacity_ = new_num_edges; - edges_ = std::make_unique(num_edges_); + node.edges_ = std::make_unique(node.num_edges_); } void t_rr_node::set_direction(e_direction new_direction) { if (type() != CHANX && type() != CHANY) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'direction' for non-channel type '%s'", type_string()); } - dir_side_.direction = new_direction; + auto& node = storage_->get(id_); + node.dir_side_.direction = new_direction; } void t_rr_node::set_side(e_side new_side) { if (type() != IPIN && type() != OPIN) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to set RR node 'side' for non-channel type '%s'", type_string()); } - dir_side_.side = new_side; + auto& node = storage_->get(id_); + node.dir_side_.side = new_side; } void t_rr_node::set_edge_sink_node(t_edge_size iedge, int sink_node) { + auto& node = storage_->get(id_); VTR_ASSERT(iedge < num_edges()); VTR_ASSERT(sink_node >= 0); - edges_[iedge].sink_node = sink_node; + node.edges_[iedge].sink_node = sink_node; } void t_rr_node::set_edge_switch(t_edge_size iedge, short switch_index) { + auto& node = storage_->get(id_); VTR_ASSERT(iedge < num_edges()); VTR_ASSERT(switch_index >= 0); - edges_[iedge].switch_id = switch_index; + node.edges_[iedge].switch_id = switch_index; } t_rr_rc_data::t_rr_rc_data(float Rval, float Cval) noexcept diff --git a/vpr/src/route/rr_node.h b/vpr/src/route/rr_node.h index cc9df7a42f8..af6a26ac411 100644 --- a/vpr/src/route/rr_node.h +++ b/vpr/src/route/rr_node.h @@ -1,5 +1,6 @@ #ifndef RR_NODE_H #define RR_NODE_H + #include "rr_node_fwd.h" #include "vpr_types.h" @@ -7,43 +8,13 @@ #include #include -/* Main structure describing one routing resource node. Everything in * - * this structure should describe the graph -- information needed only * - * to store algorithm-specific data should be stored in one of the * - * parallel rr_node_* structures. * - * * - * xlow, xhigh, ylow, yhigh: Integer coordinates (see route.c for * - * coordinate system) of the ends of this routing resource. * - * xlow = xhigh and ylow = yhigh for pins or for segments of * - * length 1. These values are used to decide whether or not this * - * node should be added to the expansion heap, based on things * - * like whether it's outside the net bounding box or is moving * - * further away from the target, etc. * - * type: What is this routing resource? * - * ptc_num: Pin, track or class number, depending on rr_node type. * - * Needed to properly draw. * - * cost_index: An integer index into the table of routing resource indexed * - * data t_rr_index_data (this indirection allows quick dynamic * - * changes of rr base costs, and some memory storage savings for * - * fields that have only a few distinct values). * - * capacity: Capacity of this node (number of routes that can use it). * - * num_edges: Number of edges exiting this node. That is, the number * - * of nodes to which it connects. * - * edges[0..num_edges-1]: Array of indices of the neighbours of this * - * node. * - * switches[0..num_edges-1]: Array of switch indexes for each of the * - * edges leaving this node. * - * * - * direction: if the node represents a track, this field * - * indicates the direction of the track. Otherwise * - * the value contained in the field should be * - * ignored. * - * side: The side of a grid location where an IPIN or OPIN is located. * - * This field is valid only for IPINs and OPINs and should be ignored * - * otherwise. */ class t_rr_node { public: //Types + t_rr_node(t_rr_node_storage* storage, RRNodeId id) + : storage_(storage) + , id_(id) {} + //An iterator that dereferences to an edge index // //Used inconjunction with vtr::Range to return ranges of edge indices @@ -72,25 +43,19 @@ class t_rr_node { typedef vtr::Range edge_idx_range; public: //Accessors - t_rr_type type() const { return type_; } + t_rr_type type() const; const char* type_string() const; /* Retrieve type as a string */ edge_idx_range edges() const { return vtr::make_range(edge_idx_iterator(0), edge_idx_iterator(num_edges())); } edge_idx_range configurable_edges() const { return vtr::make_range(edge_idx_iterator(0), edge_idx_iterator(num_edges() - num_non_configurable_edges())); } edge_idx_range non_configurable_edges() const { return vtr::make_range(edge_idx_iterator(num_edges() - num_non_configurable_edges()), edge_idx_iterator(num_edges())); } - t_edge_size num_edges() const { return num_edges_; } - t_edge_size num_configurable_edges() const { return num_edges() - num_non_configurable_edges(); } - t_edge_size num_non_configurable_edges() const { return num_non_configurable_edges_; } + t_edge_size num_edges() const; + t_edge_size num_configurable_edges() const; + t_edge_size num_non_configurable_edges() const; - int edge_sink_node(t_edge_size iedge) const { - VTR_ASSERT_SAFE(iedge < num_edges()); - return edges_[iedge].sink_node; - } - short edge_switch(t_edge_size iedge) const { - VTR_ASSERT_SAFE(iedge < num_edges()); - return edges_[iedge].switch_id; - } + int edge_sink_node(t_edge_size iedge) const; + short edge_switch(t_edge_size iedge) const; bool edge_is_configurable(t_edge_size iedge) const; t_edge_size fan_in() const; @@ -157,43 +122,21 @@ class t_rr_node { void set_direction(e_direction); void set_side(e_side); - private: //Types - //The edge information is stored in a structure to economize on the number of pointers held - //by t_rr_node (to save memory), and is not exposed externally - struct t_rr_edge { - int sink_node = -1; //The ID of the sink RR node associated with this edge - short switch_id = -1; //The ID of the switch type this edge represents - }; + void next_node() { + id_ = RRNodeId((size_t)(id_) + 1); + } + + RRNodeId id() const { + return id_; + } + + void prev_node() { + id_ = RRNodeId((size_t)(id_)-1); + } private: //Data - //Note: we use a plain array and use small types for sizes to save space vs std::vector - // (using std::vector's nearly doubles the size of the class) - std::unique_ptr edges_ = nullptr; - t_edge_size num_edges_ = 0; - t_edge_size edges_capacity_ = 0; - uint8_t num_non_configurable_edges_ = 0; - - int8_t cost_index_ = -1; - int16_t rc_index_ = -1; - - int16_t xlow_ = -1; - int16_t ylow_ = -1; - int16_t xhigh_ = -1; - int16_t yhigh_ = -1; - - t_rr_type type_ = NUM_RR_TYPES; - union { - e_direction direction; //Valid only for CHANX/CHANY - e_side side; //Valid only for IPINs/OPINs - } dir_side_; - - union { - int16_t pin_num; - int16_t track_num; - int16_t class_num; - } ptc_; - t_edge_size fan_in_ = 0; - uint16_t capacity_ = 0; + t_rr_node_storage* storage_; + RRNodeId id_; }; /* Data that is pointed to by the .cost_index member of t_rr_node. It's * @@ -261,4 +204,6 @@ struct t_rr_rc_data { */ short find_create_rr_rc_data(const float R, const float C); +#include "rr_node_impl.h" + #endif diff --git a/vpr/src/route/rr_node_fwd.h b/vpr/src/route/rr_node_fwd.h index 1711f80c780..c3c772e24c1 100644 --- a/vpr/src/route/rr_node_fwd.h +++ b/vpr/src/route/rr_node_fwd.h @@ -1,7 +1,7 @@ #ifndef RR_NODE_FWD_H #define RR_NODE_FWD_H + #include "vtr_strong_id.h" -#include "rr_node.h" /* * StrongId's for the t_rr_node class @@ -9,6 +9,8 @@ //Forward declaration class t_rr_node; +class t_rr_node_storage; +class node_idx_iterator; //Type tags for Ids struct rr_node_id_tag; diff --git a/vpr/src/route/rr_node_impl.h b/vpr/src/route/rr_node_impl.h new file mode 100644 index 00000000000..49b7ca32577 --- /dev/null +++ b/vpr/src/route/rr_node_impl.h @@ -0,0 +1,163 @@ +#ifndef _RR_NODE_IMPL_H_ +#define _RR_NODE_IMPL_H_ + +#include "rr_node.h" +#include "rr_node_storage.h" + +#include "vpr_error.h" + +class node_idx_iterator : public std::iterator { + public: + node_idx_iterator(t_rr_node value) + : value_(value) {} + + iterator operator++() { + value_.next_node(); + return *this; + } + iterator operator--() { + value_.prev_node(); + return *this; + } + reference operator*() const { return value_; } + pointer operator->() const { return &value_; } + + friend bool operator==(const node_idx_iterator lhs, const node_idx_iterator rhs) { return lhs.value_.id() == rhs.value_.id(); } + friend bool operator!=(const node_idx_iterator lhs, const node_idx_iterator rhs) { return !(lhs == rhs); } + + private: + t_rr_node value_; +}; + +inline node_idx_iterator t_rr_node_storage::begin() const { + return node_idx_iterator(t_rr_node(const_cast(this), RRNodeId(0))); +} + +inline node_idx_iterator t_rr_node_storage::end() const { + return node_idx_iterator(t_rr_node(const_cast(this), RRNodeId(size()))); +} + +inline const t_rr_node t_rr_node_storage::operator[](size_t idx) const { + return t_rr_node(const_cast(this), RRNodeId(idx)); +} + +inline t_rr_node t_rr_node_storage::operator[](size_t idx) { + return t_rr_node(this, RRNodeId(idx)); +} + +inline const t_rr_node t_rr_node_storage::at(size_t idx) const { + VTR_ASSERT(idx < storage_.size()); + return t_rr_node(const_cast(this), RRNodeId(idx)); +} + +inline t_rr_node t_rr_node_storage::at(size_t idx) { + VTR_ASSERT(idx < storage_.size()); + return t_rr_node(this, RRNodeId(idx)); +} + +inline const t_rr_node t_rr_node_storage::front() const { + return t_rr_node(const_cast(this), RRNodeId(0)); +} +inline t_rr_node t_rr_node_storage::front() { + return t_rr_node(this, RRNodeId(0)); +} + +inline const t_rr_node t_rr_node_storage::back() const { + return t_rr_node(const_cast(this), RRNodeId(size() - 1)); +} +inline t_rr_node t_rr_node_storage::back() { + return t_rr_node(this, RRNodeId(size() - 1)); +} + +inline t_rr_type t_rr_node::type() const { + return storage_->get(id_).type_; +} + +inline t_edge_size t_rr_node::num_edges() const { + return storage_->get(id_).num_edges_; +} + +inline t_edge_size t_rr_node::num_non_configurable_edges() const { + return storage_->get(id_).num_non_configurable_edges_; +} + +inline t_edge_size t_rr_node::num_configurable_edges() const { + return num_edges() - num_non_configurable_edges(); +} + +inline int t_rr_node::edge_sink_node(t_edge_size iedge) const { + return storage_->get(id_).edges_.get()[iedge].sink_node; +} +inline short t_rr_node::edge_switch(t_edge_size iedge) const { + return storage_->get(id_).edges_.get()[iedge].switch_id; +} + +inline t_edge_size t_rr_node::fan_in() const { + return storage_->get(id_).fan_in_; +} + +inline short t_rr_node::xlow() const { + return storage_->get(id_).xlow_; +} +inline short t_rr_node::ylow() const { + return storage_->get(id_).ylow_; +} +inline short t_rr_node::xhigh() const { + return storage_->get(id_).xhigh_; +} +inline short t_rr_node::yhigh() const { + return storage_->get(id_).yhigh_; +} + +inline short t_rr_node::capacity() const { + return storage_->get(id_).capacity_; +} + +inline short t_rr_node::ptc_num() const { + return storage_->get(id_).ptc_.pin_num; +} + +inline short t_rr_node::pin_num() const { + if (type() != IPIN && type() != OPIN) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'pin_num' for non-IPIN/OPIN type '%s'", type_string()); + } + return storage_->get(id_).ptc_.pin_num; +} + +inline short t_rr_node::track_num() const { + if (type() != CHANX && type() != CHANY) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'track_num' for non-CHANX/CHANY type '%s'", type_string()); + } + return storage_->get(id_).ptc_.track_num; +} + +inline short t_rr_node::class_num() const { + if (type() != SOURCE && type() != SINK) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'class_num' for non-SOURCE/SINK type '%s'", type_string()); + } + return storage_->get(id_).ptc_.class_num; +} + +inline short t_rr_node::cost_index() const { + return storage_->get(id_).cost_index_; +} + +inline short t_rr_node::rc_index() const { + return storage_->get(id_).rc_index_; +} + +inline e_direction t_rr_node::direction() const { + if (type() != CHANX && type() != CHANY) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'direction' for non-channel type '%s'", type_string()); + } + return storage_->get(id_).dir_side_.direction; +} + +inline e_side t_rr_node::side() const { + if (type() != IPIN && type() != OPIN) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Attempted to access RR node 'side' for non-IPIN/OPIN type '%s'", type_string()); + } + return storage_->get(id_).dir_side_.side; +} + +#endif /* _RR_NODE_IMPL_H_ */ diff --git a/vpr/src/route/rr_node_storage.h b/vpr/src/route/rr_node_storage.h new file mode 100644 index 00000000000..ed55334ceb1 --- /dev/null +++ b/vpr/src/route/rr_node_storage.h @@ -0,0 +1,133 @@ +#ifndef _RR_NODE_STORAGE_ +#define _RR_NODE_STORAGE_ + +#include "rr_node_fwd.h" + +/* Main structure describing one routing resource node. Everything in * + * this structure should describe the graph -- information needed only * + * to store algorithm-specific data should be stored in one of the * + * parallel rr_node_* structures. * + * * + * xlow, xhigh, ylow, yhigh: Integer coordinates (see route.c for * + * coordinate system) of the ends of this routing resource. * + * xlow = xhigh and ylow = yhigh for pins or for segments of * + * length 1. These values are used to decide whether or not this * + * node should be added to the expansion heap, based on things * + * like whether it's outside the net bounding box or is moving * + * further away from the target, etc. * + * type: What is this routing resource? * + * ptc_num: Pin, track or class number, depending on rr_node type. * + * Needed to properly draw. * + * cost_index: An integer index into the table of routing resource indexed * + * data t_rr_index_data (this indirection allows quick dynamic * + * changes of rr base costs, and some memory storage savings for * + * fields that have only a few distinct values). * + * capacity: Capacity of this node (number of routes that can use it). * + * num_edges: Number of edges exiting this node. That is, the number * + * of nodes to which it connects. * + * edges[0..num_edges-1]: Array of indices of the neighbours of this * + * node. * + * switches[0..num_edges-1]: Array of switch indexes for each of the * + * edges leaving this node. * + * * + * direction: if the node represents a track, this field * + * indicates the direction of the track. Otherwise * + * the value contained in the field should be * + * ignored. * + * side: The side of a grid location where an IPIN or OPIN is located. * + * This field is valid only for IPINs and OPINs and should be ignored * + * otherwise. */ +struct t_rr_node_data { + //The edge information is stored in a structure to economize on the number of pointers held + //by t_rr_node (to save memory), and is not exposed externally + struct t_rr_edge { + int sink_node = -1; //The ID of the sink RR node associated with this edge + short switch_id = -1; //The ID of the switch type this edge represents + }; + + //Note: we use a plain array and use small types for sizes to save space vs std::vector + // (using std::vector's nearly doubles the size of the class) + std::unique_ptr edges_ = nullptr; + t_edge_size num_edges_ = 0; + t_edge_size edges_capacity_ = 0; + uint8_t num_non_configurable_edges_ = 0; + + int8_t cost_index_ = -1; + int16_t rc_index_ = -1; + + int16_t xlow_ = -1; + int16_t ylow_ = -1; + int16_t xhigh_ = -1; + int16_t yhigh_ = -1; + + t_rr_type type_ = NUM_RR_TYPES; + union { + e_direction direction; //Valid only for CHANX/CHANY + e_side side; //Valid only for IPINs/OPINs + } dir_side_; + + union { + int16_t pin_num; + int16_t track_num; + int16_t class_num; + } ptc_; + t_edge_size fan_in_ = 0; + uint16_t capacity_ = 0; +}; + +// RR node and edge storage class. +class t_rr_node_storage { + public: + void reserve(size_t size) { + storage_.reserve(size); + } + void resize(size_t size) { + storage_.resize(size); + } + size_t size() const { + return storage_.size(); + } + bool empty() const { + return storage_.empty(); + } + + void clear() { + storage_.clear(); + } + + void shrink_to_fit() { + storage_.shrink_to_fit(); + } + + void emplace_back() { + storage_.emplace_back(); + } + + node_idx_iterator begin() const; + + node_idx_iterator end() const; + + const t_rr_node operator[](size_t idx) const; + t_rr_node operator[](size_t idx); + const t_rr_node at(size_t idx) const; + t_rr_node at(size_t idx); + + const t_rr_node front() const; + t_rr_node front(); + const t_rr_node back() const; + t_rr_node back(); + + friend class t_rr_node; + + private: + t_rr_node_data& get(const RRNodeId& id) { + return storage_[id]; + } + const t_rr_node_data& get(const RRNodeId& id) const { + return storage_[id]; + } + + vtr::vector storage_; +}; + +#endif /* _RR_NODE_STORAGE_ */