diff --git a/vpr/src/analytical_place/full_legalizer.cpp b/vpr/src/analytical_place/full_legalizer.cpp index 73fd7af3152..8f78785e6fe 100644 --- a/vpr/src/analytical_place/full_legalizer.cpp +++ b/vpr/src/analytical_place/full_legalizer.cpp @@ -104,7 +104,7 @@ class APClusterPlacer { // to share the code. // Clear the grid locations (stolen from initial_placement) - clear_all_grid_locs(blk_loc_registry); + blk_loc_registry.clear_all_grid_locs(); // Deal with the placement constraints. propagate_place_constraints(); diff --git a/vpr/src/base/blk_loc_registry.cpp b/vpr/src/base/blk_loc_registry.cpp index 4c2f767b0f1..3841524a450 100644 --- a/vpr/src/base/blk_loc_registry.cpp +++ b/vpr/src/base/blk_loc_registry.cpp @@ -36,7 +36,7 @@ int BlkLocRegistry::tile_pin_index(const ClusterPinId pin) const { } int BlkLocRegistry::net_pin_to_tile_pin_index(const ClusterNetId net_id, int net_pin_index) const { - auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); // Get the logical pin index of pin within its logical block type ClusterPinId pin_id = cluster_ctx.clb_nlist.net_pin(net_id, net_pin_index); @@ -45,22 +45,22 @@ int BlkLocRegistry::net_pin_to_tile_pin_index(const ClusterNetId net_id, int net } void BlkLocRegistry::set_block_location(ClusterBlockId blk_id, const t_pl_loc& location) { - auto& device_ctx = g_vpr_ctx.device(); - auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); const std::string& block_name = cluster_ctx.clb_nlist.block_name(blk_id); - //Check if block location is out of range of grid dimensions + // Check if block location is out of range of grid dimensions if (location.x < 0 || location.x > int(device_ctx.grid.width() - 1) || location.y < 0 || location.y > int(device_ctx.grid.height() - 1)) { VPR_THROW(VPR_ERROR_PLACE, "Block %s with ID %d is out of range at location (%d, %d). \n", block_name.c_str(), blk_id, location.x, location.y); } - //Set the location of the block + // Set the location of the block block_locs_[blk_id].loc = location; - //Check if block is at an illegal location + // Check if block is at an illegal location auto physical_tile = device_ctx.grid.get_physical_type({location.x, location.y, location.layer}); auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id); @@ -77,13 +77,71 @@ void BlkLocRegistry::set_block_location(ClusterBlockId blk_id, const t_pl_loc& l location.layer); } - //Mark the grid location and usage of the block + // Mark the grid location and usage of the block grid_blocks_.set_block_at_location(location, blk_id); grid_blocks_.increment_usage({location.x, location.y, location.layer}); place_sync_external_block_connections(blk_id); } +void BlkLocRegistry::clear_all_grid_locs() { + const auto& device_ctx = g_vpr_ctx.device(); + + std::unordered_set blk_types_to_be_cleared; + const auto& logical_block_types = device_ctx.logical_block_types; + + // Insert all the logical block types into the set except the empty type + // clear_block_type_grid_locs does not expect empty type to be among given types + for (const t_logical_block_type& logical_type : logical_block_types) { + if (!is_empty_type(&logical_type)) { + blk_types_to_be_cleared.insert(logical_type.index); + } + } + + clear_block_type_grid_locs(blk_types_to_be_cleared); +} + +void BlkLocRegistry::clear_block_type_grid_locs(const std::unordered_set& unplaced_blk_types_index) { + const auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + + bool clear_all_block_types = false; + + /* check if all types should be cleared + * logical_block_types contain empty type, needs to be ignored. + * Not having any type in unplaced_blk_types_index means that it is the first iteration, hence all grids needs to be cleared + */ + if (unplaced_blk_types_index.size() == device_ctx.logical_block_types.size() - 1) { + clear_all_block_types = true; + } + + /* We'll use the grid to record where everything goes. Initialize to the grid has no + * blocks placed anywhere. + */ + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + for (int i = 0; i < (int)device_ctx.grid.width(); i++) { + for (int j = 0; j < (int)device_ctx.grid.height(); j++) { + const t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type({i, j, layer_num}); + int itype = type->index; + if (clear_all_block_types || unplaced_blk_types_index.count(itype)) { + grid_blocks_.set_usage({i, j, layer_num}, 0); + for (int k = 0; k < device_ctx.physical_tile_types[itype].capacity; k++) { + grid_blocks_.set_block_at_location({i, j, k, layer_num}, ClusterBlockId::INVALID()); + } + } + } + } + } + + // Similarly, mark all blocks as not being placed yet. + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + int blk_type = cluster_ctx.clb_nlist.block_type(blk_id)->index; + if (clear_all_block_types || unplaced_blk_types_index.count(blk_type)) { + block_locs_[blk_id].loc = t_pl_loc(); + } + } +} + void BlkLocRegistry::place_sync_external_block_connections(ClusterBlockId iblk) { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& clb_nlist = cluster_ctx.clb_nlist; @@ -119,7 +177,7 @@ void BlkLocRegistry::place_sync_external_block_connections(ClusterBlockId iblk) } void BlkLocRegistry::apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) { - auto& device_ctx = g_vpr_ctx.device(); + const auto& device_ctx = g_vpr_ctx.device(); VTR_ASSERT_DEBUG(expected_transaction_ == e_expected_transaction::APPLY); @@ -177,7 +235,7 @@ void BlkLocRegistry::commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_af } void BlkLocRegistry::revert_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) { - auto& device_ctx = g_vpr_ctx.device(); + const auto& device_ctx = g_vpr_ctx.device(); VTR_ASSERT_DEBUG(expected_transaction_ == e_expected_transaction::COMMIT_REVERT); diff --git a/vpr/src/base/blk_loc_registry.h b/vpr/src/base/blk_loc_registry.h index 542bad3651d..17df9883e0e 100644 --- a/vpr/src/base/blk_loc_registry.h +++ b/vpr/src/base/blk_loc_registry.h @@ -61,6 +61,19 @@ class BlkLocRegistry { */ void set_block_location(ClusterBlockId blk_id, const t_pl_loc& location); + /** + * @brief Initializes the grid to empty. It also initializes the location for + * all blocks to unplaced. + */ + void clear_all_grid_locs(); + + /** + * @brief Set chosen grid locations to EMPTY block id before each placement iteration + * + * @param unplaced_blk_types_index Block types that their grid locations must be cleared. + */ + void clear_block_type_grid_locs(const std::unordered_set& unplaced_blk_types_index); + /** * @brief Syncs the logical block pins corresponding to the input iblk with the corresponding chosen physical tile * @param iblk cluster block ID to sync within the assigned physical tile diff --git a/vpr/src/base/setup_noc.cpp b/vpr/src/base/setup_noc.cpp index 9a7e1ff2630..ff05687c414 100644 --- a/vpr/src/base/setup_noc.cpp +++ b/vpr/src/base/setup_noc.cpp @@ -133,7 +133,7 @@ void create_noc_routers(const t_noc_inf& noc_info, const vtr::vector& noc_router_tiles) { // keep track of the router assignments (store the user router id that was assigned to each physical router tile) // this is used in error checking, after determining the closest physical router for a user described router in the arch file, - // the datastructure below can be used to check if that physical router was already assigned previously + // the data structure below can be used to check if that physical router was already assigned previously std::vector router_assignments; router_assignments.resize(noc_router_tiles.size(), PHYSICAL_ROUTER_NOT_ASSIGNED); diff --git a/vpr/src/base/setup_noc.h b/vpr/src/base/setup_noc.h index 98c8834d1c6..4052ec4d940 100644 --- a/vpr/src/base/setup_noc.h +++ b/vpr/src/base/setup_noc.h @@ -58,7 +58,7 @@ struct t_noc_router_tile_position { * @brief Based on the NoC information provided by the user in the architecture * description file, a NoC model is created. The model defines the * constraints of the NoC as well as its layout on the FPGA device. - * The datastructure used to define the model is "NocStorage" and that + * The data structure used to define the model is "NocStorage" and that * is created here and stored within the noc_ctx. * * @param arch Contains the parsed information from the architecture diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index cee5d919c5b..9b45cf46b9e 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -554,7 +554,7 @@ void vpr_setup_clock_networks(t_vpr_setup& vpr_setup, const t_arch& Arch) { * constraints. Additionally, the graphics state is updated * to include a NoC button to display it. * - * @param vpr_setup A datastructure that stores all the user provided option + * @param vpr_setup A data structure that stores all the user provided option * to vpr. * @param arch Contains the parsed information from the architecture * description file. @@ -1327,7 +1327,7 @@ static void free_routing() { } /** - * @brief handles the deletion of NoC related datastructures. + * @brief handles the deletion of NoC related data structures. */ static void free_noc() {} diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp index d87375dc8c3..716647c7f36 100644 --- a/vpr/src/draw/draw.cpp +++ b/vpr/src/draw/draw.cpp @@ -580,7 +580,7 @@ void init_draw_coords(float clb_width, const BlkLocRegistry& blk_loc_registry) { /* Store a reference to block location variables so that other drawing * functions can access block location information without accessing * the global placement state, which is inaccessible during placement.*/ - set_graphics_blk_loc_registry_ref(blk_loc_registry); + draw_state->set_graphics_blk_loc_registry_ref(blk_loc_registry); if (!draw_state->show_graphics && !draw_state->save_graphics && draw_state->graphics_commands.empty()) @@ -1004,8 +1004,8 @@ static void highlight_blocks(double x, double y) { return; /* Nothing was found on any layer*/ } - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& block_locs = draw_state->get_graphics_blk_loc_registry_ref().block_locs(); VTR_ASSERT(clb_index != ClusterBlockId::INVALID()); @@ -1046,14 +1046,15 @@ static void highlight_blocks(double x, double y) { ClusterBlockId get_cluster_block_id_from_xy_loc(double x, double y) { t_draw_coords* draw_coords = get_draw_coords_vars(); t_draw_state* draw_state = get_draw_state_vars(); - auto clb_index = ClusterBlockId::INVALID(); - auto& device_ctx = g_vpr_ctx.device(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& grid_blocks = get_graphics_blk_loc_registry_ref().grid_blocks(); + const auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& grid_blocks = draw_state->get_graphics_blk_loc_registry_ref().grid_blocks(); /// determine block /// ezgl::rectangle clb_bbox; + auto clb_index = ClusterBlockId::INVALID(); + //iterate over grid z (layers) first. Start search of the block at the top layer to prioritize highlighting of blocks at higher levels during overlapping of layers. for (int layer_num = device_ctx.grid.get_num_layers() - 1; layer_num >= 0; layer_num--) { if (!draw_state->draw_layer_display[layer_num].visible) { diff --git a/vpr/src/draw/draw_basic.cpp b/vpr/src/draw/draw_basic.cpp index 82ad456f70f..0dba0792419 100644 --- a/vpr/src/draw/draw_basic.cpp +++ b/vpr/src/draw/draw_basic.cpp @@ -101,9 +101,9 @@ const std::vector kelly_max_contrast_colors = { void drawplace(ezgl::renderer* g) { t_draw_state* draw_state = get_draw_state_vars(); t_draw_coords* draw_coords = get_draw_coords_vars(); - auto& device_ctx = g_vpr_ctx.device(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& grid_blocks = get_graphics_blk_loc_registry_ref().grid_blocks(); + const auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& grid_blocks = draw_state->get_graphics_blk_loc_registry_ref().grid_blocks(); ClusterBlockId bnum; int num_sub_tiles; @@ -224,12 +224,9 @@ void drawplace(ezgl::renderer* g) { void drawnets(ezgl::renderer* g) { t_draw_state* draw_state = get_draw_state_vars(); t_draw_coords* draw_coords = get_draw_coords_vars(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& block_locs = draw_state->get_graphics_blk_loc_registry_ref().block_locs(); - ClusterBlockId b1, b2; - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); - - float transparency_factor; float NET_ALPHA = draw_state->net_alpha; g->set_line_dash(ezgl::line_dash::none); @@ -250,7 +247,7 @@ void drawnets(ezgl::renderer* g) { continue; } - b1 = cluster_ctx.clb_nlist.net_driver_block(net_id); + ClusterBlockId b1 = cluster_ctx.clb_nlist.net_driver_block(net_id); //The layer of the net driver block driver_block_layer_num = block_locs[b1].loc.layer; @@ -262,7 +259,7 @@ void drawnets(ezgl::renderer* g) { ezgl::point2d driver_center = draw_coords->get_absolute_clb_bbox(b1, cluster_ctx.clb_nlist.block_type(b1)).center(); for (ClusterPinId pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { - b2 = cluster_ctx.clb_nlist.pin_block(pin_id); + ClusterBlockId b2 = cluster_ctx.clb_nlist.pin_block(pin_id); //the layer of the pin block (net sinks) sink_block_layer_num =block_locs[b2].loc.layer; @@ -272,7 +269,7 @@ void drawnets(ezgl::renderer* g) { if (!element_visibility.visible) { continue; /* Don't Draw */ } - transparency_factor = element_visibility.alpha; + float transparency_factor = element_visibility.alpha; //Take the highest of the 2 transparency values that the user can select from the UI // Compare the current cross layer transparency to the overall Net transparency set by the user. @@ -800,8 +797,8 @@ void draw_placement_macros(ezgl::renderer* g) { } t_draw_coords* draw_coords = get_draw_coords_vars(); - auto& place_ctx = g_vpr_ctx.placement(); - auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); + const auto& place_ctx = g_vpr_ctx.placement(); + const auto& block_locs = draw_state->get_graphics_blk_loc_registry_ref().block_locs(); for (const t_pl_macro& pl_macro : place_ctx.pl_macros) { @@ -1184,8 +1181,9 @@ void draw_crit_path_elements(const std::vector& paths, const } int get_timing_path_node_layer_num(tatum::NodeId node) { - auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); - auto& atom_ctx = g_vpr_ctx.atom(); + t_draw_state* draw_state = get_draw_state_vars(); + const auto& block_locs = draw_state->get_graphics_blk_loc_registry_ref().block_locs(); + const auto& atom_ctx = g_vpr_ctx.atom(); AtomPinId atom_pin = atom_ctx.lookup.tnode_atom_pin(node); AtomBlockId atom_block = atom_ctx.nlist.pin_block(atom_pin); @@ -1415,9 +1413,9 @@ void draw_block_pin_util() { if (draw_state->show_blk_pin_util == DRAW_NO_BLOCK_PIN_UTIL) return; - auto& device_ctx = g_vpr_ctx.device(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); + const auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& block_locs = draw_state->get_graphics_blk_loc_registry_ref().block_locs(); std::map total_input_pins; std::map total_output_pins; @@ -1475,9 +1473,8 @@ void draw_block_pin_util() { } void draw_reset_blk_colors() { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto blks = cluster_ctx.clb_nlist.blocks(); - for (auto blk : blks) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + for (auto blk : cluster_ctx.clb_nlist.blocks()) { draw_reset_blk_color(blk); } } diff --git a/vpr/src/draw/draw_global.cpp b/vpr/src/draw/draw_global.cpp index 936b0eeb4a2..391b659bbcf 100644 --- a/vpr/src/draw/draw_global.cpp +++ b/vpr/src/draw/draw_global.cpp @@ -26,13 +26,6 @@ static t_draw_state draw_state; */ static t_draw_coords draw_coords; -/** - * @brief Stores a reference to a PlaceLocVars to be used in the graphics code. - * @details This reference let us pass in a currently-being-optimized placement state, - * rather than using the global placement state in placement context that is valid only once placement is done - */ -static std::optional> blk_loc_registry_ref; - /*********************** Accessor Subroutines Definition ********************/ /* This accessor function returns pointer to the global variable @@ -47,12 +40,4 @@ t_draw_state* get_draw_state_vars() { return &draw_state; } -void set_graphics_blk_loc_registry_ref(const BlkLocRegistry& blk_loc_registry) { - blk_loc_registry_ref = std::ref(blk_loc_registry); -} - -const BlkLocRegistry& get_graphics_blk_loc_registry_ref() { - return blk_loc_registry_ref->get(); -} - #endif // NO_GRAPHICS diff --git a/vpr/src/draw/draw_global.h b/vpr/src/draw/draw_global.h index 9ba5d734020..f1eec3967f5 100644 --- a/vpr/src/draw/draw_global.h +++ b/vpr/src/draw/draw_global.h @@ -27,24 +27,6 @@ t_draw_coords* get_draw_coords_vars(); t_draw_state* get_draw_state_vars(); -/** - * @brief Set the reference to placement location variable. - * - * During the placement stage, this reference should point to a local object - * in the placement stage because the placement stage does not change the - * global stage in place_ctx until the end of placement. After the placement is - * done, the reference should point to the global state stored in place_ctx. - * - * @param blk_loc_registry The PlaceLocVars that the reference will point to. - */ -void set_graphics_blk_loc_registry_ref(const BlkLocRegistry& blk_loc_registry); - -/** - * @brief Returns the reference to placement block location variables. - * @return A const reference to placement block location variables. - */ -const BlkLocRegistry& get_graphics_blk_loc_registry_ref(); - #endif // NO_GRAPHICS #endif diff --git a/vpr/src/draw/draw_noc.cpp b/vpr/src/draw/draw_noc.cpp index e8362352292..d112cb7cae5 100644 --- a/vpr/src/draw/draw_noc.cpp +++ b/vpr/src/draw/draw_noc.cpp @@ -74,7 +74,8 @@ void draw_noc(ezgl::renderer* g) { */ void draw_noc_usage(vtr::vector& noc_link_colors) { t_draw_state* draw_state = get_draw_state_vars(); - auto& noc_ctx = g_vpr_ctx.noc(); + const auto& noc_ctx = g_vpr_ctx.noc(); + const auto& noc_link_bandwidth_usages = draw_state->get_noc_link_bandwidth_usages_ref(); // check to see if a color map was already created previously if (draw_state->noc_usage_color_map == nullptr) { @@ -90,15 +91,15 @@ void draw_noc_usage(vtr::vector& noc_link_colors) { // represents the color to draw each noc link ezgl::color current_noc_link_color; - for (const auto& noc_link : noc_ctx.noc_model.get_noc_links()) { - NocLinkId link_id = noc_link.get_link_id(); + for (const auto& [link_id, bandwidth_usage] : noc_link_bandwidth_usages.pairs()) { // only update the color of the link if it wasn't updated previously if (noc_link_colors[link_id] == ezgl::BLACK) { // if we are here then the link was not updated previously, so assign the color here + double link_bandwidth = noc_ctx.noc_model.get_single_noc_link(link_id).get_bandwidth(); //get the current link bandwidth usage (ratio calculation) - double link_bandwidth_usage_ratio = (noc_link.get_bandwidth_usage()) / noc_link.get_bandwidth(); + double link_bandwidth_usage_ratio = bandwidth_usage / link_bandwidth; // check if the link is being overused and if it is then cap it at 1.0 if (link_bandwidth_usage_ratio > 1.0) { diff --git a/vpr/src/draw/draw_searchbar.cpp b/vpr/src/draw/draw_searchbar.cpp index f3457c45992..00a1208bcba 100644 --- a/vpr/src/draw/draw_searchbar.cpp +++ b/vpr/src/draw/draw_searchbar.cpp @@ -68,7 +68,7 @@ ezgl::rectangle draw_get_rr_chan_bbox(RRNodeId inode) { double left = 0, right = 0, top = 0, bottom = 0; t_draw_coords* draw_coords = get_draw_coords_vars(); - auto& device_ctx = g_vpr_ctx.device(); + const auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; switch (rr_graph.node_type(inode)) { @@ -105,14 +105,11 @@ ezgl::rectangle draw_get_rr_chan_bbox(RRNodeId inode) { void draw_highlight_blocks_color(t_logical_block_type_ptr type, ClusterBlockId blk_id) { - int k; - ClusterBlockId fanblk; - t_draw_state* draw_state = get_draw_state_vars(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& block_locs = draw_state->get_graphics_blk_loc_registry_ref().block_locs(); - for (k = 0; k < type->pb_type->num_pins; k++) { /* Each pin on a CLB */ + for (int k = 0; k < type->pb_type->num_pins; k++) { /* Each pin on a CLB */ ClusterNetId net_id = cluster_ctx.clb_nlist.block_net(blk_id, k); if (net_id == ClusterNetId::INVALID()) { @@ -130,14 +127,14 @@ void draw_highlight_blocks_color(t_logical_block_type_ptr type, /* If block already highlighted, de-highlight the fanout. (the deselect case)*/ draw_state->net_color[net_id] = ezgl::BLACK; for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { - fanblk = cluster_ctx.clb_nlist.pin_block(pin_id); + ClusterBlockId fanblk = cluster_ctx.clb_nlist.pin_block(pin_id); draw_reset_blk_color(fanblk); } } else { /* Highlight the fanout */ draw_state->net_color[net_id] = DRIVES_IT_COLOR; for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { - fanblk = cluster_ctx.clb_nlist.pin_block(pin_id); + ClusterBlockId fanblk = cluster_ctx.clb_nlist.pin_block(pin_id); draw_state->set_block_color(fanblk, DRIVES_IT_COLOR); } } @@ -145,12 +142,12 @@ void draw_highlight_blocks_color(t_logical_block_type_ptr type, if (draw_state->block_color(blk_id) == SELECTED_COLOR) { /* If block already highlighted, de-highlight the fanin. (the deselect case)*/ draw_state->net_color[net_id] = ezgl::BLACK; - fanblk = cluster_ctx.clb_nlist.net_driver_block(net_id); /* DRIVER to net */ + ClusterBlockId fanblk = cluster_ctx.clb_nlist.net_driver_block(net_id); /* DRIVER to net */ draw_reset_blk_color(fanblk); } else { /* Highlight the fanin */ draw_state->net_color[net_id] = DRIVEN_BY_IT_COLOR; - fanblk = cluster_ctx.clb_nlist.net_driver_block(net_id); /* DRIVER to net */ + ClusterBlockId fanblk = cluster_ctx.clb_nlist.net_driver_block(net_id); /* DRIVER to net */ draw_state->set_block_color(fanblk, DRIVEN_BY_IT_COLOR); } } @@ -211,7 +208,7 @@ void highlight_nets(char* message, RRNodeId hit_node, bool is_flat) { */ void draw_highlight_fan_in_fan_out(const std::set& nodes) { t_draw_state* draw_state = get_draw_state_vars(); - auto& device_ctx = g_vpr_ctx.device(); + const auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; for (auto node : nodes) { @@ -267,8 +264,8 @@ void deselect_all() { // as well as clearing the highlighed sub-block t_draw_state* draw_state = get_draw_state_vars(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& device_ctx = g_vpr_ctx.device(); /* Create some colour highlighting */ for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { diff --git a/vpr/src/draw/draw_types.cpp b/vpr/src/draw/draw_types.cpp index d2ed9e47c3c..b3909c5fd7c 100644 --- a/vpr/src/draw/draw_types.cpp +++ b/vpr/src/draw/draw_types.cpp @@ -87,8 +87,9 @@ float t_draw_coords::get_tile_height() { } ezgl::rectangle t_draw_coords::get_pb_bbox(ClusterBlockId clb_index, const t_pb_graph_node& pb_gnode) { - auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); - auto& cluster_ctx = g_vpr_ctx.clustering(); + t_draw_state* draw_state = get_draw_state_vars(); + const auto& block_locs = draw_state->get_graphics_blk_loc_registry_ref().block_locs(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); return get_pb_bbox(block_locs[clb_index].loc.layer, block_locs[clb_index].loc.x, @@ -99,7 +100,7 @@ ezgl::rectangle t_draw_coords::get_pb_bbox(ClusterBlockId clb_index, const t_pb_ } ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type, const t_pb_graph_node& pb_gnode) { - auto& device_ctx = g_vpr_ctx.device(); + const auto& device_ctx = g_vpr_ctx.device(); t_draw_pb_type_info& blk_type_info = this->blk_info.at(logical_block_type->index); ezgl::rectangle result = blk_type_info.get_pb_bbox(pb_gnode); @@ -118,7 +119,7 @@ ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_layer, int grid_x, int grid_ } ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type) { - auto& device_ctx = g_vpr_ctx.device(); + const auto& device_ctx = g_vpr_ctx.device(); t_draw_pb_type_info& blk_type_info = this->blk_info.at(logical_block_type->index); auto& pb_gnode = *logical_block_type->pb_graph_head; @@ -141,7 +142,7 @@ ezgl::rectangle t_draw_coords::get_absolute_pb_bbox(const ClusterBlockId clb_ind ezgl::rectangle result = this->get_pb_bbox(clb_index, *pb_gnode); // go up the graph, adding the parent bboxes to the result, - // ie. make it relative to one level higher each time. + // i.e. make it relative to one level higher each time. while (pb_gnode && !pb_gnode->is_root()) { ezgl::rectangle parents_bbox = this->get_pb_bbox(clb_index, *pb_gnode->parent_pb_graph_node); result += parents_bbox.bottom_left(); @@ -152,7 +153,8 @@ ezgl::rectangle t_draw_coords::get_absolute_pb_bbox(const ClusterBlockId clb_ind } ezgl::rectangle t_draw_coords::get_absolute_clb_bbox(const ClusterBlockId clb_index, const t_logical_block_type_ptr block_type) { - auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); + t_draw_state* draw_state = get_draw_state_vars(); + const auto& block_locs = draw_state->get_graphics_blk_loc_registry_ref().block_locs(); t_pl_loc loc = block_locs[clb_index].loc; return get_pb_bbox(loc.layer, loc.x, loc.y, loc.sub_tile, block_type); diff --git a/vpr/src/draw/draw_types.h b/vpr/src/draw/draw_types.h index 3779af4cf79..1f6689e63f4 100644 --- a/vpr/src/draw/draw_types.h +++ b/vpr/src/draw/draw_types.h @@ -310,10 +310,61 @@ struct t_draw_state { std::vector> colored_locations; + /** + * @brief Set the reference to placement location variable. + * + * @details During the placement stage, this reference should point to a + * local object in the placement stage because the placement stage does not change + * the global stage in place_ctx until the end of placement. After the placement + * is done, the reference should point to the global state stored in place_ctx. + * + * @param blk_loc_registry The PlaceLocVars that the reference will point to. + */ + void set_graphics_blk_loc_registry_ref(const BlkLocRegistry& blk_loc_registry) { + blk_loc_registry_ref_ = std::ref(blk_loc_registry); + } + + /** + * @brief Returns the reference to placement block location variables. + * @return A const reference to placement block location variables. + */ + const BlkLocRegistry& get_graphics_blk_loc_registry_ref() const { + return blk_loc_registry_ref_->get(); + } + + /** + * @brief Set the internal reference to NoC link bandwidth utilization array. + * @param noc_link_bandwidth_usages The array that the internal reference will + * be pointing to. + */ + void set_noc_link_bandwidth_usages_ref(const vtr::vector& noc_link_bandwidth_usages) { + noc_link_bandwidth_usages_ref_ = noc_link_bandwidth_usages; + } + + /** + * @brief Returns the reference to NoC link bandwidth utilization array. + * @return A const reference to NoC link bandwidth utilization array. + */ + const vtr::vector& get_noc_link_bandwidth_usages_ref() const { + return noc_link_bandwidth_usages_ref_->get(); + } + private: friend void alloc_draw_structs(const t_arch* arch); vtr::vector block_color_; vtr::vector use_default_block_color_; + + /** + * @brief Stores a reference to a BlkLocRegistry to be used in the graphics code. + * @details This reference let us pass in a currently-being-optimized placement state, + * rather than using the global placement state in placement context that is valid only once placement is done + */ + std::optional> blk_loc_registry_ref_; + + /** + * @brief Stores a reference to NoC link bandwidth utilization to be used in the graphics codes. + */ + std::optional>> noc_link_bandwidth_usages_ref_; }; /* For each cluster type, this structure stores drawing diff --git a/vpr/src/draw/intra_logic_block.cpp b/vpr/src/draw/intra_logic_block.cpp index 855f2262e63..c1ee67c7590 100644 --- a/vpr/src/draw/intra_logic_block.cpp +++ b/vpr/src/draw/intra_logic_block.cpp @@ -55,16 +55,13 @@ void draw_one_logical_connection(const AtomPinId src_pin, const AtomPinId sink_p /************************* Subroutine definitions begin *********************************/ void draw_internal_alloc_blk() { - t_draw_coords* draw_coords; + t_draw_coords* draw_coords = get_draw_coords_vars(); + const auto& device_ctx = g_vpr_ctx.device(); t_pb_graph_node* pb_graph_head; - /* Call accessor function to retrieve global variables. */ - draw_coords = get_draw_coords_vars(); - /* Create a vector holding coordinate information for each type of physical logic * block. */ - auto& device_ctx = g_vpr_ctx.device(); draw_coords->blk_info.resize(device_ctx.logical_block_types.size()); for (const auto& type : device_ctx.logical_block_types) { @@ -150,9 +147,9 @@ void draw_internal_draw_subblk(ezgl::renderer* g) { if (!draw_state->show_blk_internal) { return; } - auto& device_ctx = g_vpr_ctx.device(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& grid_blocks = get_graphics_blk_loc_registry_ref().grid_blocks(); + const auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& grid_blocks = draw_state->get_graphics_blk_loc_registry_ref().grid_blocks(); int total_layer_num = device_ctx.grid.get_num_layers(); @@ -269,11 +266,9 @@ static void draw_internal_load_coords(int type_descrip_index, t_pb_graph_node* p */ static void draw_internal_calc_coords(int type_descrip_index, t_pb_graph_node* pb_graph_node, int num_pb_types, int type_index, int num_pb, int pb_index, float parent_width, float parent_height, float* blk_width, float* blk_height) { - float parent_drawing_width, parent_drawing_height; - float sub_tile_x, sub_tile_y; - float child_width, child_height; - auto& device_ctx = g_vpr_ctx.device(); - const auto& grid_blocks = get_graphics_blk_loc_registry_ref().grid_blocks(); + t_draw_state* draw_state = get_draw_state_vars(); + const auto& device_ctx = g_vpr_ctx.device(); + const auto& grid_blocks = draw_state->get_graphics_blk_loc_registry_ref().grid_blocks(); // get the bbox for this pb type ezgl::rectangle& pb_bbox = get_draw_coords_vars()->blk_info.at(type_descrip_index).get_pb_bbox_ref(*pb_graph_node); @@ -286,7 +281,6 @@ draw_internal_calc_coords(int type_descrip_index, t_pb_graph_node* pb_graph_node const float FRACTION_CHILD_MARGIN_X = 0.025; const float FRACTION_CHILD_MARGIN_Y = 0.04; - double left, bot, right, top; int capacity = device_ctx.physical_tile_types[type_descrip_index].capacity; // TODO: this is a hack - should be fixed for the layer_num @@ -299,23 +293,23 @@ draw_internal_calc_coords(int type_descrip_index, t_pb_graph_node* pb_graph_node } /* Draw all child-level blocks in just most of the space inside their parent block. */ - parent_drawing_width = parent_width * (1 - FRACTION_PARENT_PADDING_X * 2); - parent_drawing_height = parent_height * (NORMAL_FRACTION_PARENT_HEIGHT / capacity_divisor); + float parent_drawing_width = parent_width * (1 - FRACTION_PARENT_PADDING_X * 2); + float parent_drawing_height = parent_height * (NORMAL_FRACTION_PARENT_HEIGHT / capacity_divisor); /* The left and bottom corner (inside the parent block) of the space to draw * child blocks. */ - sub_tile_x = parent_width * FRACTION_PARENT_PADDING_X; - sub_tile_y = parent_height * FRACTION_PARENT_PADDING_BOTTOM; + float sub_tile_x = parent_width * FRACTION_PARENT_PADDING_X; + float sub_tile_y = parent_height * FRACTION_PARENT_PADDING_BOTTOM; /* Divide parent_drawing_width by the number of child types. */ - child_width = parent_drawing_width / num_pb_types; + float child_width = parent_drawing_width / num_pb_types; /* Divide parent_drawing_height by the number of instances of the pb_type. */ - child_height = parent_drawing_height / num_pb; + float child_height = parent_drawing_height / num_pb; /* The starting point to draw the physical block. */ - left = child_width * type_index + sub_tile_x + FRACTION_CHILD_MARGIN_X * child_width; - bot = child_height * pb_index + sub_tile_y + FRACTION_CHILD_MARGIN_Y * child_height; + double left = child_width * type_index + sub_tile_x + FRACTION_CHILD_MARGIN_X * child_width; + double bot = child_height * pb_index + sub_tile_y + FRACTION_CHILD_MARGIN_Y * child_height; /* Leave some space between different pb_types. */ child_width *= 1 - FRACTION_CHILD_MARGIN_X * 2; @@ -323,8 +317,8 @@ draw_internal_calc_coords(int type_descrip_index, t_pb_graph_node* pb_graph_node child_height *= 1 - FRACTION_CHILD_MARGIN_Y * 2; /* Endpoint for drawing the pb_type */ - right = left + child_width; - top = bot + child_height; + double right = left + child_width; + double top = bot + child_height; pb_bbox = ezgl::rectangle({right, top}, {left, bot}); @@ -340,8 +334,7 @@ draw_internal_calc_coords(int type_descrip_index, t_pb_graph_node* pb_graph_node static void draw_internal_pb(const ClusterBlockId clb_index, t_pb* pb, const ezgl::rectangle& parent_bbox, const t_logical_block_type_ptr type, ezgl::renderer* g) { t_draw_coords* draw_coords = get_draw_coords_vars(); t_draw_state* draw_state = get_draw_state_vars(); - - auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); + const auto& block_locs = draw_state->get_graphics_blk_loc_registry_ref().block_locs(); t_selected_sub_block_info& sel_sub_info = get_selected_sub_block_info(); @@ -557,9 +550,8 @@ void collect_pb_atoms_recurr(const t_pb* pb, std::vector& atoms) { void draw_logical_connections(ezgl::renderer* g) { const t_selected_sub_block_info& sel_subblk_info = get_selected_sub_block_info(); t_draw_state* draw_state = get_draw_state_vars(); - - auto& atom_ctx = g_vpr_ctx.atom(); - auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); + const auto& atom_ctx = g_vpr_ctx.atom(); + const auto& block_locs = draw_state->get_graphics_blk_loc_registry_ref().block_locs(); g->set_line_dash(ezgl::line_dash::none); @@ -671,8 +663,8 @@ void find_pin_index_at_model_scope(const AtomPinId pin_id, const AtomBlockId blk # ifndef NO_GRAPHICS /** * Draws ONE logical connection from src_pin in src_lblk to sink_pin in sink_lblk. - * The *_abs_bbox parameters are for mild optmization, as the absolute bbox can be calculated - * more effeciently elsewhere. + * The *_abs_bbox parameters are for mild optimization, as the absolute bbox can be calculated + * more efficiently elsewhere. */ void draw_one_logical_connection(const AtomPinId src_pin, const AtomPinId sink_pin, ezgl::renderer* g) { ezgl::point2d src_point = atom_pin_draw_coord(src_pin); @@ -681,7 +673,7 @@ void draw_one_logical_connection(const AtomPinId src_pin, const AtomPinId sink_p // draw a link connecting the pins. g->draw_line(src_point, sink_point); - auto& atom_ctx = g_vpr_ctx.atom(); + const auto& atom_ctx = g_vpr_ctx.atom(); if (atom_ctx.lookup.atom_clb(atom_ctx.nlist.pin_block(src_pin)) == atom_ctx.lookup.atom_clb(atom_ctx.nlist.pin_block(sink_pin))) { // if they are in the same clb, put one arrow in the center float center_x = (src_point.x + sink_point.x) / 2; diff --git a/vpr/src/draw/manual_moves.cpp b/vpr/src/draw/manual_moves.cpp index 00fb05e3ab2..0becc4917a9 100644 --- a/vpr/src/draw/manual_moves.cpp +++ b/vpr/src/draw/manual_moves.cpp @@ -147,9 +147,11 @@ void calculate_cost_callback(GtkWidget* /*widget*/, GtkWidget* grid) { } bool is_manual_move_legal(ClusterBlockId block_id, t_pl_loc to) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& device_ctx = g_vpr_ctx.device(); - const auto& grid_blocks = get_graphics_blk_loc_registry_ref().grid_blocks(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& device_ctx = g_vpr_ctx.device(); + t_draw_state* draw_state = get_draw_state_vars(); + const auto& grid_blocks = draw_state->get_graphics_blk_loc_registry_ref().grid_blocks(); + const auto& block_locs = draw_state->get_graphics_blk_loc_registry_ref().block_locs(); //if the block is not found if ((!cluster_ctx.clb_nlist.valid_block_id(ClusterBlockId(block_id)))) { @@ -175,14 +177,14 @@ bool is_manual_move_legal(ClusterBlockId block_id, t_pl_loc to) { //If the destination block is user constrained, abort this swap ClusterBlockId b_to = grid_blocks.block_at_location(to); if (b_to) { - if (get_graphics_blk_loc_registry_ref().block_locs()[b_to].is_fixed) { + if (block_locs[b_to].is_fixed) { invalid_breakpoint_entry_window("Block is fixed"); return false; } } //If the block requested is already in that location. - t_pl_loc current_block_loc = get_graphics_blk_loc_registry_ref().block_locs()[block_id].loc; + t_pl_loc current_block_loc = block_locs[block_id].loc; if (to.x == current_block_loc.x && to.y == current_block_loc.y && to.sub_tile == current_block_loc.sub_tile) { invalid_breakpoint_entry_window("The block is currently in this location"); return false; diff --git a/vpr/src/draw/search_bar.cpp b/vpr/src/draw/search_bar.cpp index 756dfd9d976..c0aeef85f84 100644 --- a/vpr/src/draw/search_bar.cpp +++ b/vpr/src/draw/search_bar.cpp @@ -282,8 +282,9 @@ void auto_zoom_rr_node(RRNodeId rr_node_id) { */ void highlight_cluster_block(ClusterBlockId clb_index) { char msg[vtr::bufsize]; - auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); + t_draw_state* draw_state = get_draw_state_vars(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& block_locs = draw_state->get_graphics_blk_loc_registry_ref().block_locs(); /// determine block /// ezgl::rectangle clb_bbox; @@ -320,8 +321,8 @@ void highlight_cluster_block(ClusterBlockId clb_index) { * @return false | If sub-block not found (impossible in search case) or not shown at current zoom lvl */ bool highlight_atom_block(AtomBlockId atom_blk, ClusterBlockId cl_blk, ezgl::application* app) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cl_ctx = g_vpr_ctx.clustering(); + const auto& atom_ctx = g_vpr_ctx.atom(); + const auto& cl_ctx = g_vpr_ctx.clustering(); t_pb* pb = cl_ctx.clb_nlist.block_pb(cl_blk); //Getting the pb* for the atom block diff --git a/vpr/src/noc/bfs_routing.cpp b/vpr/src/noc/bfs_routing.cpp index b80551958d6..a72ec6b29e4 100644 --- a/vpr/src/noc/bfs_routing.cpp +++ b/vpr/src/noc/bfs_routing.cpp @@ -28,9 +28,9 @@ void BFSRouting::route_flow(NocRouterId src_router_id, /* * As the routing goes through the NoC, each router visited has a * corresponding link that was used to reach the router. This - * datastructure stores the link that was used to visit each router in + * data structure stores the link that was used to visit each router in * the NoC. - * Once the destination router has been found. This datastructure can be used to + * Once the destination router has been found. This data structure can be used to * trace the path back to the source router. */ std::unordered_map router_parent_link; diff --git a/vpr/src/noc/noc_data_types.h b/vpr/src/noc/noc_data_types.h index a75f387270b..6317db4ae09 100644 --- a/vpr/src/noc/noc_data_types.h +++ b/vpr/src/noc/noc_data_types.h @@ -3,7 +3,7 @@ /** * @file - * @brief This file contains datatype definitions which are used by the NoC datastructures. + * @brief This file contains datatype definitions which are used by the NoC data structures. * */ diff --git a/vpr/src/noc/noc_link.cpp b/vpr/src/noc/noc_link.cpp index 28340ff96d8..f272023dbfc 100644 --- a/vpr/src/noc/noc_link.cpp +++ b/vpr/src/noc/noc_link.cpp @@ -6,7 +6,6 @@ NocLink::NocLink(NocLinkId link_id, NocRouterId source, NocRouterId sink, : id(link_id) , source_router(source) , sink_router(sink) - , bandwidth_usage(0.0) , bandwidth(bw) , latency(lat) { } @@ -19,10 +18,6 @@ NocRouterId NocLink::get_sink_router() const { return sink_router; } -double NocLink::get_bandwidth_usage() const { - return bandwidth_usage; -} - //setters void NocLink::set_source_router(NocRouterId source) { source_router = source; @@ -32,10 +27,6 @@ void NocLink::set_sink_router(NocRouterId sink) { sink_router = sink; } -void NocLink::set_bandwidth_usage(double new_bandwidth_usage) { - bandwidth_usage = new_bandwidth_usage; -} - void NocLink::set_bandwidth(double new_bandwidth) { bandwidth = new_bandwidth; } @@ -44,22 +35,6 @@ double NocLink::get_bandwidth() const { return bandwidth; } -double NocLink::get_congested_bandwidth() const { - double congested_bandwidth = bandwidth_usage - bandwidth; - congested_bandwidth = std::max(congested_bandwidth, 0.0); - - VTR_ASSERT(congested_bandwidth >= 0.0); - return congested_bandwidth; -} - -double NocLink::get_congested_bandwidth_ratio() const { - double congested_bw = get_congested_bandwidth(); - double congested_bw_ratio = congested_bw / get_bandwidth(); - - VTR_ASSERT(congested_bw_ratio >= 0.0); - return congested_bw_ratio; -} - double NocLink::get_latency() const { return latency; } diff --git a/vpr/src/noc/noc_link.h b/vpr/src/noc/noc_link.h index 38d8cec42de..3f56e763726 100644 --- a/vpr/src/noc/noc_link.h +++ b/vpr/src/noc/noc_link.h @@ -49,7 +49,6 @@ class NocLink { NocRouterId source_router; /*!< The router which uses this link as an outgoing edge*/ NocRouterId sink_router; /*!< The router which uses this link as an incoming edge*/ - double bandwidth_usage; /*!< Represents the bandwidth of the data being transmitted on the link. Units in bits-per-second(bps)*/ double bandwidth; /*!< Represents the maximum bits per second that can be transmitted over the link without causing congestion*/ double latency; /*!< The zero-load latency of this link in seconds.*/ @@ -73,31 +72,12 @@ class NocLink { */ NocRouterId get_sink_router() const; - /** - * @brief Provides the size of the data (bandwidth) being currently transmitted using the link. - * @return A numeric value of the bandwidth usage of the link - */ - double get_bandwidth_usage() const; - /** * @brief Returns the maximum bandwidth that the link can carry without congestion. * @return A numeric value of the bandwidth capacity of the link */ double get_bandwidth() const; - /** - * @brief Calculates the extent to which the current bandwidth utilization - * exceeds the link capacity. Any positive value means the link is congested. - * @return A numeric value of the bandwidth over-utilization in the link - */ - double get_congested_bandwidth() const; - - /** - * @brief Computes the congested bandwidth to bandwidth capacity ratio. - * @return The congested bandwidth to bandwidth capacity of the link. - */ - double get_congested_bandwidth_ratio() const; - /** * @brief Returns the zero-load latency of the link. * @return double Zero-load latency of the link. @@ -126,15 +106,6 @@ class NocLink { */ void set_sink_router(NocRouterId sink); - /** - * @brief Can modify the bandwidth usage of the link. It is expected that when the NoC is being placed - * the traffic flows will be re-routed multiple times. So the links will end up being used and un-used - * by different traffic flows and the bandwidths of the links will correspondingly change. This function - * can be used to make those changes - * @param new_bandwidth_usage The new value of the bandwidth usage of the link - */ - void set_bandwidth_usage(double new_bandwidth_usage); - /** * @brief Sets the bandwidth capacity of the link. This function should be used when * global NoC data structures are created and populated. The bandwidth capacity is used diff --git a/vpr/src/noc/noc_storage.cpp b/vpr/src/noc/noc_storage.cpp index cbf32c802c9..58e79d46b03 100644 --- a/vpr/src/noc/noc_storage.cpp +++ b/vpr/src/noc/noc_storage.cpp @@ -106,11 +106,11 @@ bool NocStorage::is_noc_3d() const { // setters for the NoC void NocStorage::add_router(int id, - int grid_position_x, int grid_posistion_y, int layer_position, + int grid_position_x, int grid_position_y, int layer_position, double latency) { VTR_ASSERT_MSG(!built_noc, "NoC already built, cannot modify further."); - router_storage.emplace_back(id, grid_position_x, grid_posistion_y, layer_position, latency); + router_storage.emplace_back(id, grid_position_x, grid_position_y, layer_position, latency); /* Get the corresponding NocRouterId for the newly added router and * add it to the conversion table. @@ -123,7 +123,7 @@ void NocStorage::add_router(int id, /* need to associate the current router with its grid position */ // get the key to identify the current router - int router_key = generate_router_key_from_grid_location(grid_position_x, grid_posistion_y, layer_position); + int router_key = generate_router_key_from_grid_location(grid_position_x, grid_position_y, layer_position); grid_location_to_router_id.insert(std::pair(router_key, converted_id)); } @@ -205,8 +205,6 @@ bool NocStorage::remove_link(NocRouterId src_router_id, NocRouterId sink_router_ link_storage[link_to_be_removed_id].set_source_router(NocRouterId::INVALID()); link_storage[link_to_be_removed_id].set_sink_router(NocRouterId::INVALID()); - link_storage[link_to_be_removed_id].set_bandwidth_usage(-1); - } // if a link was not removed then throw warning message diff --git a/vpr/src/noc/noc_storage.h b/vpr/src/noc/noc_storage.h index dbaa9cfdb5b..89dcf87bbac 100644 --- a/vpr/src/noc/noc_storage.h +++ b/vpr/src/noc/noc_storage.h @@ -71,7 +71,7 @@ class NocStorage { * in the architecture file. This ID system will be different than the * NocRouterIds assigned to each router. The user ID system will be * arbitrary but the internal ID system used here will start at 0 and - * are dense since it is used to index the routers. The datastructure + * are dense since it is used to index the routers. The data structure * below is a conversiont able that maps the user router IDs to the * corresponding internal ones. */ @@ -82,12 +82,12 @@ class NocStorage { * location. During placement, when logical routers are moved to * different hard routers, only the grid location of where the * logical router was moved is known. - * Using this datastructure, the grid location can be used to + * Using this data structure, the grid location can be used to * identify the corresponding hard router block positioned at that grid * location. The NocROuterId uniquely identifies hard router blocks and * can be used to retrieve the hard router block information using * the router_storage data structure above. This can also be used to - * access the connectivity graph datastructure above. + * access the connectivity graph data structure above. * * It is important to know the specific hard router block because * without it we cannot determine the starting/end points of the traffic @@ -97,7 +97,7 @@ class NocStorage { * * The intended use is when trying to re-route a traffic flow. The current * location of a logical router block can be used in conjunction with this - * datastructure to identify the corresponding hard router block. + * data structure to identify the corresponding hard router block. * */ std::unordered_map grid_location_to_router_id; @@ -170,14 +170,14 @@ class NocStorage { */ int layer_num_grid_locs; - // prevent "copying" of this object - NocStorage(const NocStorage&) = delete; - void operator=(const NocStorage&) = delete; - public: // default constructor (clear all the elements in the vectors) NocStorage(); + // prevent "copying" of this object + NocStorage(const NocStorage&) = delete; + void operator=(const NocStorage&) = delete; + // getters for the NoC /** @@ -379,6 +379,10 @@ class NocStorage { * tile that this router represents. * @param grid_position_y The vertical position on the FPGA of the physical * tile that this router represents. + * @param layer_position The layer where the physical tile that this router + * represents is located. + * @param latency The zero-load latency that a traffic flow will experience + * when it is routed through this router. */ void add_router(int id, int grid_position_x, int grid_position_y, int layer_position, @@ -464,7 +468,7 @@ class NocStorage { void finished_building_noc(); /** - * @brief Resets the NoC by clearing all internal datastructures. + * @brief Resets the NoC by clearing all internal data structures. * This includes deleting all routers and links. Also all internal * IDs are removed (the is conversion table is cleared). It is * recommended to run this function before building the NoC. @@ -489,7 +493,7 @@ class NocStorage { int convert_router_id(NocRouterId id) const; /** - * @brief The datastructure that stores the outgoing links to each + * @brief The data structure that stores the outgoing links to each * router is an 2-D Vector. When processing the links, they can be * outgoing from any router in the NoC. Therefore the column size * of the 2-D vector needs to be the size of the number of routers @@ -527,7 +531,7 @@ class NocStorage { * @brief Generates a unique integer using the x and y coordinates of a * hard router block that can be used to identify it. This should be * used to generate the keys for the 'grid_location_to_router_id' - * datastructure. + * data structure. * * The key will be generated as follows: * key = y * device_grid.width() + x diff --git a/vpr/src/noc/noc_traffic_flows.cpp b/vpr/src/noc/noc_traffic_flows.cpp index dbfd20134b5..310eacd2f16 100644 --- a/vpr/src/noc/noc_traffic_flows.cpp +++ b/vpr/src/noc/noc_traffic_flows.cpp @@ -37,18 +37,6 @@ int NocTrafficFlows::get_number_of_routers_used_in_traffic_flows() { return traffic_flows_associated_to_router_blocks.size(); } -const std::vector& NocTrafficFlows::get_traffic_flow_route(NocTrafficFlowId traffic_flow_id) const { - return traffic_flow_routes[traffic_flow_id]; -} - -std::vector& NocTrafficFlows::get_mutable_traffic_flow_route(NocTrafficFlowId traffic_flow_id) { - return traffic_flow_routes[traffic_flow_id]; -} - -const vtr::vector>& NocTrafficFlows::get_all_traffic_flow_routes() const { - return traffic_flow_routes; -} - const std::vector& NocTrafficFlows::get_router_clusters_in_netlist() const { return router_cluster_in_netlist; } @@ -104,19 +92,14 @@ void NocTrafficFlows::set_router_cluster_in_netlist(const std::vector> traffic_flow_routes; - // private functions /** @@ -138,7 +127,7 @@ class NocTrafficFlows { * @param traffic_flow_id A unique id that represents a traffic flow. * @param associated_router_id A ClusterBlockId that represents a * router block. - * @param router_associated_traffic_flows A datastructure that stores + * @param router_associated_traffic_flows A data structure that stores * a vector of traffic flows for a given router block where the traffic * flows have the router as a source or sink within the flow. * @@ -194,37 +183,6 @@ class NocTrafficFlows { */ int get_number_of_routers_used_in_traffic_flows(); - /** - * @brief Gets the routed path of traffic flow. This cannot be - * modified externally. - * - * @param traffic_flow_id A unique identifier that represents a - * traffic flow. - * @return std::vector& A reference to the provided - * traffic flow's routed path. - */ - const std::vector& get_traffic_flow_route(NocTrafficFlowId traffic_flow_id) const; - - /** - * @brief Gets the routed path of a traffic flow. The path - * returned can and is expected to be modified externally. - * - * @param traffic_flow_id A unique identifier that represents a - * traffic flow. - * @return std::vector& A reference to the provided - * traffic flow's vector of links used from the src to dst. - */ - std::vector& get_mutable_traffic_flow_route(NocTrafficFlowId traffic_flow_id); - - /** - * @brief Gets all routed paths for all traffic flows. This cannot be - * modified externally. - * - * @return vtr::vector>& A reference - * to the provided container that includes all traffic flow routes. - */ - const vtr::vector>& get_all_traffic_flow_routes() const; - /** * @return a vector ([0..num_logical_router-1]) where each entry gives the clusterBlockId * of a logical NoC router. Used for fast lookups in the placer. @@ -251,7 +209,7 @@ class NocTrafficFlows { * flows in the design. * * Finally, the newly created traffic flow is - * also added to internal datastructures that can be used to quickly + * also added to internal data structures that can be used to quickly * look up which traffic flows contain a specific router cluster block. * * @param source_router_module_name A string that represents the @@ -304,8 +262,7 @@ class NocTrafficFlows { /** * @brief Resets the class by clearing internal - * datastructures. - * + * data structures. */ void clear_traffic_flows(); @@ -331,7 +288,7 @@ class NocTrafficFlows { /** * @brief Writes out the NocTrafficFlows class information to a file. - * This includes printing out each internal datastructure information. + * This includes printing out each internal data structure information. * * @param file_name The name of the file that contains the NoC * traffic flow information diff --git a/vpr/src/noc/read_xml_noc_traffic_flows_file.h b/vpr/src/noc/read_xml_noc_traffic_flows_file.h index 7abd685d82e..2d8b348ad71 100644 --- a/vpr/src/noc/read_xml_noc_traffic_flows_file.h +++ b/vpr/src/noc/read_xml_noc_traffic_flows_file.h @@ -164,7 +164,7 @@ void verify_traffic_flow_properties(double traffic_flow_bandwidth, double max_tr * @param router_module_name The name of the router module in the design for * which the corresponding block id needs to be found. * @param cluster_ctx Global variable that contains clustering information. - * Contains a datastructure to convert a module name to + * Contains a data structure to convert a module name to * a cluster block id. * @param single_flow_tag A xml tag that contains the traffic flow information. * Passed in for error logging. @@ -197,7 +197,7 @@ ClusterBlockId get_router_module_cluster_id(const std::string& router_module_nam * @param loc_data Contains location data about the current line in the xml * file. Passed in for error logging. * @param cluster_ctx Global variable that contains clustering information. - * Contains a datastructure to get the logical type of a + * Contains a data structure to get the logical type of a * router cluster block. * @param noc_router_tile_type The physical type of a Noc router tile in the * FPGA. Used to check if the router block is @@ -208,7 +208,7 @@ void check_traffic_flow_router_module_type(const std::string& router_module_name /** * @brief Retrieves the physical type of a noc router tile. * - * @param device_ctx Contains the device information. Has a datastructure that + * @param device_ctx Contains the device information. Has a data structure that * can determine a tile type based on grid position on the * FPGA. * @param noc_ctx Contains the NoC information. Used to get the grid position diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp index d59db1de93c..9b4a3adeeed 100644 --- a/vpr/src/place/initial_noc_placement.cpp +++ b/vpr/src/place/initial_noc_placement.cpp @@ -12,7 +12,6 @@ #include "vtr_math.h" #include "vtr_time.h" -#include #include /** @@ -60,7 +59,8 @@ static void place_noc_routers_randomly(std::vector& unfixed_rout * To be filled with the location where pl_macro is placed. */ static void noc_routers_anneal(const t_noc_opts& noc_opts, - BlkLocRegistry& blk_loc_registry); + BlkLocRegistry& blk_loc_registry, + NocCostHandler& noc_cost_handler); /** * @brief Returns the compressed grid of NoC. @@ -202,7 +202,8 @@ static void place_noc_routers_randomly(std::vector& unfixed_rout } static void noc_routers_anneal(const t_noc_opts& noc_opts, - BlkLocRegistry& blk_loc_registry) { + BlkLocRegistry& blk_loc_registry, + NocCostHandler& noc_cost_handler) { auto& noc_ctx = g_vpr_ctx.noc(); const auto& block_locs = blk_loc_registry.block_locs(); @@ -210,10 +211,10 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts, t_placer_costs costs; // Initialize NoC-related costs - costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost(); - std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = comp_noc_latency_cost(); - costs.noc_cost_terms.congestion = comp_noc_congestion_cost(); - update_noc_normalization_factors(costs); + costs.noc_cost_terms.aggregate_bandwidth = noc_cost_handler.comp_noc_aggregate_bandwidth_cost(); + std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = noc_cost_handler.comp_noc_latency_cost(); + costs.noc_cost_terms.congestion = noc_cost_handler.comp_noc_congestion_cost(); + noc_cost_handler.update_noc_normalization_factors(costs); costs.cost = calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts); const auto& compressed_noc_grid = get_compressed_noc_grid(); @@ -243,7 +244,7 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts, const double prob_step = starting_prob / N_MOVES; // The checkpoint stored the placement with the lowest cost. - NoCPlacementCheckpoint checkpoint; + NoCPlacementCheckpoint checkpoint(noc_cost_handler); /* Algorithm overview: * In each iteration, one logical NoC router and a physical NoC router are selected randomly. @@ -270,7 +271,7 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts, blk_loc_registry.apply_move_blocks(blocks_affected); NocCostTerms noc_delta_c; - find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c, block_locs); + noc_cost_handler.find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c); double delta_cost = calculate_noc_cost(noc_delta_c, costs.noc_cost_norm_factors, noc_opts); double prob = starting_prob - i_move * prob_step; @@ -279,7 +280,7 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts, if (move_accepted) { costs.cost += delta_cost; blk_loc_registry.commit_move_blocks(blocks_affected); - commit_noc_costs(); + noc_cost_handler.commit_noc_costs(); costs += noc_delta_c; // check if the current placement is better than the stored checkpoint if (costs.cost < checkpoint.get_cost() || !checkpoint.is_valid()) { @@ -287,7 +288,7 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts, } } else { // The proposed move is rejected blk_loc_registry.revert_move_blocks(blocks_affected); - revert_noc_traffic_flow_routes(blocks_affected, block_locs); + noc_cost_handler.revert_noc_traffic_flow_routes(blocks_affected); } } } @@ -299,7 +300,8 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts, void initial_noc_placement(const t_noc_opts& noc_opts, const t_placer_opts& placer_opts, - BlkLocRegistry& blk_loc_registry) { + BlkLocRegistry& blk_loc_registry, + NocCostHandler& noc_cost_handler) { vtr::ScopedStartFinishTimer timer("Initial NoC Placement"); auto& noc_ctx = g_vpr_ctx.noc(); const auto& block_locs = blk_loc_registry.block_locs(); @@ -327,13 +329,13 @@ void initial_noc_placement(const t_noc_opts& noc_opts, place_noc_routers_randomly(unfixed_routers, placer_opts.seed, blk_loc_registry); // populate internal data structures to maintain route, bandwidth usage, and latencies - initial_noc_routing({}, block_locs); + noc_cost_handler.initial_noc_routing({}); // Run the simulated annealing optimizer for NoC routers - noc_routers_anneal(noc_opts, blk_loc_registry); + noc_routers_anneal(noc_opts, blk_loc_registry, noc_cost_handler); // check if there is any cycles - bool has_cycle = noc_routing_has_cycle(block_locs); + bool has_cycle = noc_cost_handler.noc_routing_has_cycle(); if (has_cycle) { VPR_FATAL_ERROR(VPR_ERROR_PLACE, "At least one cycle was found in NoC channel dependency graph. This may cause a deadlock " diff --git a/vpr/src/place/initial_noc_placment.h b/vpr/src/place/initial_noc_placment.h index 3a37f95969c..bcac134618c 100644 --- a/vpr/src/place/initial_noc_placment.h +++ b/vpr/src/place/initial_noc_placment.h @@ -5,6 +5,7 @@ struct t_noc_opts; struct t_placer_opts; class BlkLocRegistry; +class NocCostHandler; /** * @brief Randomly places NoC routers, then runs a quick simulated annealing @@ -17,6 +18,7 @@ class BlkLocRegistry; */ void initial_noc_placement(const t_noc_opts& noc_opts, const t_placer_opts& placer_opts, - BlkLocRegistry& blk_loc_registry); + BlkLocRegistry& blk_loc_registry, + NocCostHandler& noc_cost_handler); #endif //VTR_INITIAL_NOC_PLACEMENT_H diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index 4a6bdd0c533..4cf4f6cc89f 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -13,12 +13,11 @@ #include "move_utils.h" #include "region.h" #include "directed_moves_util.h" +#include "noc_place_utils.h" #include "echo_files.h" - -#include #include -#include +#include #ifdef VERBOSE void print_clb_placement(const char* fname); @@ -33,16 +32,6 @@ static constexpr int SORT_WEIGHT_PER_FAILED_BLOCK = 10; // The amount of weight that will be added to each tile which is outside the floorplanning constraints static constexpr int SORT_WEIGHT_PER_TILES_OUTSIDE_OF_PR = 100; -/** - * @brief Set chosen grid locations to EMPTY block id before each placement iteration - * - * @param unplaced_blk_types_index Block types that their grid locations must be cleared. - * @param blk_loc_registry Placement block location information. To be filled with the location - * where pl_macro is placed. - */ -static void clear_block_type_grid_locs(const std::unordered_set& unplaced_blk_types_index, - BlkLocRegistry& blk_loc_registry); - /** * @brief Control routine for placing a macro. * First iteration of place_marco performs the following steps to place a macro: @@ -1023,7 +1012,7 @@ static void place_all_blocks(const t_placer_opts& placer_opts, for (auto iter_no = 0; iter_no < MAX_INIT_PLACE_ATTEMPTS; iter_no++) { //clear grid for a new placement iteration - clear_block_type_grid_locs(unplaced_blk_type_in_curr_itr, blk_loc_registry); + blk_loc_registry.clear_block_type_grid_locs(unplaced_blk_type_in_curr_itr); unplaced_blk_type_in_curr_itr.clear(); // read the constraint file if the user has provided one and this is not the first attempt @@ -1097,67 +1086,6 @@ static void place_all_blocks(const t_placer_opts& placer_opts, } } -static void clear_block_type_grid_locs(const std::unordered_set& unplaced_blk_types_index, - BlkLocRegistry& blk_loc_registry) { - auto& device_ctx = g_vpr_ctx.device(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& grid_blocks = blk_loc_registry.mutable_grid_blocks(); - auto& block_locs = blk_loc_registry.mutable_block_locs(); - - bool clear_all_block_types = false; - - /* check if all types should be cleared - * logical_block_types contain empty type, needs to be ignored. - * Not having any type in unplaced_blk_types_index means that it is the first iteration, hence all grids needs to be cleared - */ - if (unplaced_blk_types_index.size() == device_ctx.logical_block_types.size() - 1) { - clear_all_block_types = true; - } - - /* We'll use the grid to record where everything goes. Initialize to the grid has no - * blocks placed anywhere. - */ - for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { - for (int i = 0; i < (int)device_ctx.grid.width(); i++) { - for (int j = 0; j < (int)device_ctx.grid.height(); j++) { - const t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type({i, j, layer_num}); - int itype = type->index; - if (clear_all_block_types || unplaced_blk_types_index.count(itype)) { - grid_blocks.set_usage({i, j, layer_num}, 0); - for (int k = 0; k < device_ctx.physical_tile_types[itype].capacity; k++) { - grid_blocks.set_block_at_location({i, j, k, layer_num}, ClusterBlockId::INVALID()); - } - } - } - } - } - - /* Similarly, mark all blocks as not being placed yet. */ - for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { - int blk_type = cluster_ctx.clb_nlist.block_type(blk_id)->index; - if (clear_all_block_types || unplaced_blk_types_index.count(blk_type)) { - block_locs[blk_id].loc = t_pl_loc(); - } - } -} - -void clear_all_grid_locs(BlkLocRegistry& blk_loc_registry) { - auto& device_ctx = g_vpr_ctx.device(); - - std::unordered_set blk_types_to_be_cleared; - const auto& logical_block_types = device_ctx.logical_block_types; - - // Insert all the logical block types into the set except the empty type - // clear_block_type_grid_locs does not expect empty type to be among given types - for (const t_logical_block_type& logical_type : logical_block_types) { - if (!is_empty_type(&logical_type)) { - blk_types_to_be_cleared.insert(logical_type.index); - } - } - - clear_block_type_grid_locs(blk_types_to_be_cleared, blk_loc_registry); -} - bool place_one_block(const ClusterBlockId blk_id, enum e_pad_loc_type pad_loc_type, std::vector* blk_types_empty_locs_in_grid, @@ -1222,14 +1150,15 @@ static void alloc_and_load_movable_blocks(const vtr::vector_map& noc_cost_handler) { vtr::ScopedStartFinishTimer timer("Initial Placement"); auto& block_locs = blk_loc_registry.mutable_block_locs(); /* Initialize the grid blocks to empty. * Initialize all the blocks to unplaced. */ - clear_all_grid_locs(blk_loc_registry); + blk_loc_registry.clear_all_grid_locs(); /* Go through cluster blocks to calculate the tightest placement * floorplan constraint for each constrained block @@ -1257,7 +1186,7 @@ void initial_placement(const t_placer_opts& placer_opts, } else { if (noc_opts.noc) { // NoC routers are placed before other blocks - initial_noc_placement(noc_opts, placer_opts, blk_loc_registry); + initial_noc_placement(noc_opts, placer_opts, blk_loc_registry, noc_cost_handler.value()); propagate_place_constraints(); } diff --git a/vpr/src/place/initial_placement.h b/vpr/src/place/initial_placement.h index 6cc391335e2..567978806cb 100644 --- a/vpr/src/place/initial_placement.h +++ b/vpr/src/place/initial_placement.h @@ -1,6 +1,9 @@ #ifndef VPR_INITIAL_PLACEMENT_H #define VPR_INITIAL_PLACEMENT_H +class NocCostHandler; + +#include #include "place_macro.h" #include "partition_region.h" @@ -10,9 +13,9 @@ class BlkLocRegistry; -/* The maximum number of tries when trying to place a macro at a * - * random location before trying exhaustive placement - find the first * - * legal position and place it during initial placement. */ +/* The maximum number of tries when trying to place a macro at a + * random location before trying exhaustive placement - find the first + * legal position and place it during initial placement. */ constexpr int MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY = 8; /** @@ -135,7 +138,8 @@ bool is_block_placed(ClusterBlockId blk_id, void initial_placement(const t_placer_opts& placer_opts, const char* constraints_file, const t_noc_opts& noc_opts, - BlkLocRegistry& blk_loc_registry); + BlkLocRegistry& blk_loc_registry, + std::optional& noc_cost_handler); /** * @brief Looks for a valid placement location for block. @@ -155,13 +159,6 @@ bool place_one_block(const ClusterBlockId blk_id, vtr::vector* block_scores, BlkLocRegistry& blk_loc_registry); -/** - * @brief Initializes the grid to empty. It also initializes the location for - * all blocks to unplaced. - * - * @param blk_loc_registry Placement block location information. To be filled with the location - * where pl_macro is placed. - */ -void clear_all_grid_locs(BlkLocRegistry& blk_loc_registry); + #endif diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 5abd9cb4f86..07058f1374f 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1682,66 +1682,32 @@ void NetCostHandler::reset_move_nets() { } } -void NetCostHandler::recompute_costs_from_scratch(const t_noc_opts& noc_opts, - const PlaceDelayModel* delay_model, +void NetCostHandler::recompute_costs_from_scratch(const PlaceDelayModel* delay_model, const PlacerCriticalities* criticalities, - t_placer_costs* costs) { + t_placer_costs& costs) { auto check_and_print_cost = [](double new_cost, double old_cost, const std::string& cost_name) -> void { - if (!vtr::isclose(new_cost, old_cost, ERROR_TOL, 0.)) { + if (!vtr::isclose(new_cost, old_cost, PL_INCREMENTAL_COST_TOLERANCE, 0.)) { std::string msg = vtr::string_fmt( "in recompute_costs_from_scratch: new_%s = %g, old %s = %g, ERROR_TOL = %g\n", - cost_name.c_str(), new_cost, cost_name.c_str(), old_cost, ERROR_TOL); + cost_name.c_str(), new_cost, cost_name.c_str(), old_cost, PL_INCREMENTAL_COST_TOLERANCE); VPR_ERROR(VPR_ERROR_PLACE, msg.c_str()); } }; double new_bb_cost = recompute_bb_cost_(); - check_and_print_cost(new_bb_cost, costs->bb_cost, "bb_cost"); - costs->bb_cost = new_bb_cost; + check_and_print_cost(new_bb_cost, costs.bb_cost, "bb_cost"); + costs.bb_cost = new_bb_cost; if (placer_opts_.place_algorithm.is_timing_driven()) { double new_timing_cost = 0.; comp_td_costs(delay_model, *criticalities, placer_state_, &new_timing_cost); - check_and_print_cost(new_timing_cost, costs->timing_cost, "timing_cost"); - costs->timing_cost = new_timing_cost; + check_and_print_cost(new_timing_cost, costs.timing_cost, "timing_cost"); + costs.timing_cost = new_timing_cost; } else { VTR_ASSERT(placer_opts_.place_algorithm == BOUNDING_BOX_PLACE); - costs->cost = new_bb_cost * costs->bb_cost_norm; - } - - if (noc_opts.noc) { - NocCostTerms new_noc_cost; - recompute_noc_costs(new_noc_cost); - - check_and_print_cost(new_noc_cost.aggregate_bandwidth, - costs->noc_cost_terms.aggregate_bandwidth, - "noc_aggregate_bandwidth"); - costs->noc_cost_terms.aggregate_bandwidth = new_noc_cost.aggregate_bandwidth; - - // only check if the recomputed cost and the current noc latency cost are within the error tolerance if the cost is above 1 picosecond. - // Otherwise, there is no need to check (we expect the latency cost to be above the threshold of 1 picosecond) - if (new_noc_cost.latency > MIN_EXPECTED_NOC_LATENCY_COST) { - check_and_print_cost(new_noc_cost.latency, - costs->noc_cost_terms.latency, - "noc_latency_cost"); - } - costs->noc_cost_terms.latency = new_noc_cost.latency; - - if (new_noc_cost.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST) { - check_and_print_cost(new_noc_cost.latency_overrun, - costs->noc_cost_terms.latency_overrun, - "noc_latency_overrun_cost"); - } - costs->noc_cost_terms.latency_overrun = new_noc_cost.latency_overrun; - - if (new_noc_cost.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) { - check_and_print_cost(new_noc_cost.congestion, - costs->noc_cost_terms.congestion, - "noc_congestion_cost"); - } - costs->noc_cost_terms.congestion = new_noc_cost.congestion; + costs.cost = new_bb_cost * costs.bb_cost_norm; } } diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 3b35bf68ea3..0c34ca34176 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -15,12 +15,6 @@ class PlacerState; -/** - * @brief The error tolerance due to round off for the total cost computation. - * When we check it from scratch vs. incrementally. 0.01 means that there is a 1% error tolerance. - */ -constexpr double ERROR_TOL = .01; - /** * @brief The method used to calculate placement cost * @details For comp_cost. NORMAL means use the method that generates updatable bounding boxes for speed. @@ -39,8 +33,8 @@ class NetCostHandler { public: NetCostHandler() = delete; NetCostHandler(const NetCostHandler&) = delete; - NetCostHandler(NetCostHandler&&) = delete; NetCostHandler& operator=(const NetCostHandler&) = delete; + NetCostHandler(NetCostHandler&&) = delete; NetCostHandler& operator=(NetCostHandler&&) = delete; /** @@ -121,10 +115,9 @@ class NetCostHandler { * Used to computed timing cost . * @param costs passed by reference and computed by this routine (i.e. returned by reference) */ - void recompute_costs_from_scratch(const t_noc_opts& noc_opts, - const PlaceDelayModel* delay_model, + void recompute_costs_from_scratch(const PlaceDelayModel* delay_model, const PlacerCriticalities* criticalities, - t_placer_costs* costs); + t_placer_costs& costs); private: ///@brief Specifies whether the bounding box is computed using cube method or per-layer method. diff --git a/vpr/src/place/noc_place_checkpoint.cpp b/vpr/src/place/noc_place_checkpoint.cpp index b23cc4cf4ed..0cb0f0134c1 100644 --- a/vpr/src/place/noc_place_checkpoint.cpp +++ b/vpr/src/place/noc_place_checkpoint.cpp @@ -2,8 +2,9 @@ #include "noc_place_checkpoint.h" #include "noc_place_utils.h" -NoCPlacementCheckpoint::NoCPlacementCheckpoint() - : valid_(false) +NoCPlacementCheckpoint::NoCPlacementCheckpoint(NocCostHandler& noc_cost_handler) + : noc_cost_handler_(noc_cost_handler) + , valid_(false) , cost_(std::numeric_limits::infinity()) { const auto& noc_ctx = g_vpr_ctx.noc(); @@ -36,7 +37,6 @@ void NoCPlacementCheckpoint::restore_checkpoint(t_placer_costs& costs, const auto& noc_ctx = g_vpr_ctx.noc(); const auto& device_ctx = g_vpr_ctx.device(); GridBlock& grid_blocks = blk_loc_registry.mutable_grid_blocks(); - const auto& block_locs = blk_loc_registry.block_locs(); // Get all physical routers const auto& noc_phy_routers = noc_ctx.noc_model.get_noc_routers(); @@ -64,7 +64,8 @@ void NoCPlacementCheckpoint::restore_checkpoint(t_placer_costs& costs, } // Re-initialize routes and static variables that keep track of NoC-related costs - reinitialize_noc_routing(costs, {}, block_locs); + VTR_ASSERT(noc_cost_handler_.points_to_same_block_locs(blk_loc_registry.block_locs())); + noc_cost_handler_.reinitialize_noc_routing(costs, {}); } bool NoCPlacementCheckpoint::is_valid() const { diff --git a/vpr/src/place/noc_place_checkpoint.h b/vpr/src/place/noc_place_checkpoint.h index 3eb631c273e..e794e3e2d65 100644 --- a/vpr/src/place/noc_place_checkpoint.h +++ b/vpr/src/place/noc_place_checkpoint.h @@ -1,6 +1,8 @@ #ifndef VTR_ROUTERPLACEMENTCHECKPOINT_H #define VTR_ROUTERPLACEMENTCHECKPOINT_H +class NocCostHandler; + /** * @brief NoC router placement checkpoint * @@ -29,7 +31,7 @@ class NoCPlacementCheckpoint { /** * @brief Default constructor initializes private member variables. */ - NoCPlacementCheckpoint(); + explicit NoCPlacementCheckpoint(NocCostHandler& noc_cost_handler); NoCPlacementCheckpoint(const NoCPlacementCheckpoint& other) = delete; NoCPlacementCheckpoint& operator=(const NoCPlacementCheckpoint& other) = delete; @@ -67,6 +69,7 @@ class NoCPlacementCheckpoint { private: std::unordered_map router_locations_; + NocCostHandler& noc_cost_handler_; bool valid_ = false; double cost_; }; diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp index 0b4a10fee1c..3d67588a532 100644 --- a/vpr/src/place/noc_place_utils.cpp +++ b/vpr/src/place/noc_place_utils.cpp @@ -5,6 +5,7 @@ #include "vtr_log.h" #include "vtr_assert.h" #include "vtr_random.h" +#include "vtr_math.h" #include "channel_dependency_graph.h" #include "noc_routing_algorithm_creator.h" @@ -17,20 +18,8 @@ #endif #include +#include -/********************** Variables local to noc_place_utils.c pp***************************/ -/* Proposed and actual cost of a noc traffic flow used for each move assessment */ -static vtr::vector traffic_flow_costs, proposed_traffic_flow_costs; - -/* Keeps track of traffic flows that have been updated at each attempted placement move*/ -static std::vector affected_traffic_flows; - -/* Proposed and actual congestion cost of a NoC link used for each move assessment */ -static vtr::vector link_congestion_costs, proposed_link_congestion_costs; - -/* Keeps track of NoC links whose bandwidth usage have been updated at each attempted placement move*/ -static std::unordered_set affected_noc_links; -/*********************************************************** *****************************/ /** * @brief Randomly select a movable NoC router cluster blocks @@ -59,13 +48,35 @@ static bool select_random_router_cluster(ClusterBlockId& b_from, static std::vector find_affected_links_by_flow_reroute(std::vector& prev_links, std::vector& curr_links); -void initial_noc_routing(const vtr::vector>& new_traffic_flow_routes, - const vtr::vector_map& block_locs) { +NocCostHandler::NocCostHandler(const vtr::vector_map& block_locs) + : block_locs_ref(block_locs) { + const auto& noc_ctx = g_vpr_ctx.noc(); + + int number_of_traffic_flows = noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows(); + + traffic_flow_costs.resize(number_of_traffic_flows, {INVALID_NOC_COST_TERM, INVALID_NOC_COST_TERM}); + proposed_traffic_flow_costs.resize(number_of_traffic_flows, {INVALID_NOC_COST_TERM, INVALID_NOC_COST_TERM}); + + traffic_flow_routes.resize(number_of_traffic_flows, {}); + traffic_flow_routes_backup.resize(number_of_traffic_flows, {}); + + int number_of_noc_links = noc_ctx.noc_model.get_number_of_noc_links(); + + link_congestion_costs.resize(number_of_noc_links, INVALID_NOC_COST_TERM); + proposed_link_congestion_costs.resize(number_of_noc_links, INVALID_NOC_COST_TERM); + + link_bandwidth_usages.resize(number_of_noc_links, 0.0); +} + +bool NocCostHandler::points_to_same_block_locs(const vtr::vector_map& block_locs) const { + return std::addressof(block_locs_ref) == std::addressof(block_locs); +} + +void NocCostHandler::initial_noc_routing(const vtr::vector>& new_traffic_flow_routes) { // need to update the link usages within after routing all the traffic flows // also need to route all the traffic flows and store them - auto& noc_ctx = g_vpr_ctx.mutable_noc(); - - NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage; + const auto& noc_ctx = g_vpr_ctx.noc(); + const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage; VTR_ASSERT(new_traffic_flow_routes.size() == (size_t)noc_traffic_flows_storage.get_number_of_traffic_flows() || new_traffic_flow_routes.empty()); @@ -83,34 +94,31 @@ void initial_noc_routing(const vtr::vector& curr_traffic_flow_route = new_traffic_flow_routes.empty() - ? route_traffic_flow(traffic_flow_id, noc_ctx.noc_model, noc_traffic_flows_storage, *noc_ctx.noc_flows_router, block_locs) + ? route_traffic_flow(traffic_flow_id, noc_ctx.noc_model, noc_traffic_flows_storage, *noc_ctx.noc_flows_router) : new_traffic_flow_routes[traffic_flow_id]; if (!new_traffic_flow_routes.empty()) { - noc_traffic_flows_storage.get_mutable_traffic_flow_route(traffic_flow_id) = curr_traffic_flow_route; + traffic_flow_routes[traffic_flow_id] = curr_traffic_flow_route; } // update the links used in the found traffic flow route, links' bandwidth should be incremented since the traffic flow is routed - update_traffic_flow_link_usage(curr_traffic_flow_route, noc_ctx.noc_model, 1, curr_traffic_flow.traffic_flow_bandwidth); + update_traffic_flow_link_usage(curr_traffic_flow_route, 1, curr_traffic_flow.traffic_flow_bandwidth); } } -void reinitialize_noc_routing(t_placer_costs& costs, - const vtr::vector>& new_traffic_flow_routes, - const vtr::vector_map& block_locs) { +void NocCostHandler::reinitialize_noc_routing(t_placer_costs& costs, + const vtr::vector>& new_traffic_flow_routes) { // used to access NoC links and modify them - auto& noc_ctx = g_vpr_ctx.mutable_noc(); + const auto& noc_ctx = g_vpr_ctx.noc(); VTR_ASSERT((size_t)noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows() == new_traffic_flow_routes.size() || new_traffic_flow_routes.empty()); // Zero out bandwidth usage for all links - for (auto& noc_link : noc_ctx.noc_model.get_mutable_noc_links()) { - noc_link.set_bandwidth_usage(0.0); - } + std::fill(link_bandwidth_usages.begin(), link_bandwidth_usages.end(), 0.0); // Route traffic flows and update link bandwidth usage - initial_noc_routing(new_traffic_flow_routes, block_locs); + initial_noc_routing(new_traffic_flow_routes); // Initialize traffic_flow_costs costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost(); @@ -118,9 +126,8 @@ void reinitialize_noc_routing(t_placer_costs& costs, costs.noc_cost_terms.congestion = comp_noc_congestion_cost(); } -void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, - NocCostTerms& delta_c, - const vtr::vector_map& block_locs) { +void NocCostHandler::find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, + NocCostTerms& delta_c) { /* For speed, delta_c is passed by reference instead of being returned. * We expect delta cost terms to be zero to ensure correctness. */ @@ -128,9 +135,9 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move VTR_ASSERT_SAFE(delta_c.latency == 0.); VTR_ASSERT_SAFE(delta_c.latency_overrun == 0.); VTR_ASSERT_SAFE(delta_c.congestion == 0.); - auto& noc_ctx = g_vpr_ctx.mutable_noc(); - NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage; + const auto& noc_ctx = g_vpr_ctx.noc(); + const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage; // keeps track of traffic flows that have been re-routed // This is useful for cases where two moved routers were part of the same traffic flow and prevents us from re-routing the same flow twice. @@ -146,14 +153,14 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move // check if the current moved block is a noc router if (noc_traffic_flows_storage.check_if_cluster_block_has_traffic_flows(blk)) { // current block is a router, so re-route all the traffic flows it is a part of - re_route_associated_traffic_flows(blk, noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, updated_traffic_flows, block_locs); + re_route_associated_traffic_flows(blk, noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, updated_traffic_flows); } } // go through all the affected traffic flows and calculate their new costs after being re-routed, then determine the change in cost before the traffic flows were modified - for (auto& traffic_flow_id : affected_traffic_flows) { + for (NocTrafficFlowId traffic_flow_id : affected_traffic_flows) { // get the traffic flow route - const std::vector& traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id); + const std::vector& traffic_flow_route = traffic_flow_routes[traffic_flow_id]; // get the current traffic flow info const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id); @@ -172,19 +179,19 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move // Iterate over all affected links and calculate their new congestion cost and store it for (const NocLink& link : noc_ctx.noc_model.get_noc_links(affected_noc_links)) { // calculate the new congestion cost for the link and store it - proposed_link_congestion_costs[link] = calculate_link_congestion_cost(link); + proposed_link_congestion_costs[link] = get_link_congestion_cost(link); // compute how much the congestion cost changes with this swap delta_c.congestion += proposed_link_congestion_costs[link] - link_congestion_costs[link]; } } -void commit_noc_costs() { +void NocCostHandler::commit_noc_costs() { // used to access NoC links - auto& noc_ctx = g_vpr_ctx.mutable_noc(); + const auto& noc_ctx = g_vpr_ctx.noc(); // Iterate over all the traffic flows affected by the proposed router swap - for (auto& traffic_flow_id : affected_traffic_flows) { + for (NocTrafficFlowId traffic_flow_id : affected_traffic_flows) { // update the traffic flow costs traffic_flow_costs[traffic_flow_id] = proposed_traffic_flow_costs[traffic_flow_id]; @@ -204,11 +211,10 @@ void commit_noc_costs() { } } -std::vector& route_traffic_flow(NocTrafficFlowId traffic_flow_id, - const NocStorage& noc_model, - NocTrafficFlows& noc_traffic_flows_storage, - NocRouting& noc_flows_router, - const vtr::vector_map& block_locs) { +std::vector& NocCostHandler::route_traffic_flow(NocTrafficFlowId traffic_flow_id, + const NocStorage& noc_model, + const NocTrafficFlows& noc_traffic_flows_storage, + NocRouting& noc_flows_router) { // get the traffic flow with the current id const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id); @@ -217,77 +223,71 @@ std::vector& route_traffic_flow(NocTrafficFlowId traffic_flow_id, ClusterBlockId logical_sink_router_block_id = curr_traffic_flow.sink_router_cluster_id; // get the ids of the hard router blocks where the logical router cluster blocks have been placed - NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(block_locs[logical_source_router_block_id].loc); - NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(block_locs[logical_sink_router_block_id].loc); + NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(block_locs_ref[logical_source_router_block_id].loc); + NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(block_locs_ref[logical_sink_router_block_id].loc); // route the current traffic flow - std::vector& curr_traffic_flow_route = noc_traffic_flows_storage.get_mutable_traffic_flow_route(traffic_flow_id); + std::vector& curr_traffic_flow_route = traffic_flow_routes[traffic_flow_id]; noc_flows_router.route_flow(source_router_block_id, sink_router_block_id, traffic_flow_id, curr_traffic_flow_route, noc_model); return curr_traffic_flow_route; } -void update_traffic_flow_link_usage(const std::vector& traffic_flow_route, - NocStorage& noc_model, - int inc_or_dec, - double traffic_flow_bandwidth) { +void NocCostHandler::update_traffic_flow_link_usage(const std::vector& traffic_flow_route, + int inc_or_dec, + double traffic_flow_bandwidth) { // go through the links within the traffic flow route and update their bandwidth usage - for (auto& link_in_route_id : traffic_flow_route) { + for (const NocLinkId link_in_route_id : traffic_flow_route) { // get the link to update and its current bandwidth - NocLink& curr_link = noc_model.get_single_mutable_noc_link(link_in_route_id); - double curr_link_bandwidth = curr_link.get_bandwidth_usage(); + double curr_link_bandwidth_usage = link_bandwidth_usages[link_in_route_id]; - curr_link.set_bandwidth_usage(curr_link_bandwidth + inc_or_dec * traffic_flow_bandwidth); + link_bandwidth_usages[link_in_route_id] = curr_link_bandwidth_usage + inc_or_dec * traffic_flow_bandwidth; // check that the bandwidth never goes to negative - VTR_ASSERT(curr_link.get_bandwidth_usage() >= 0.0); + VTR_ASSERT_SAFE(link_bandwidth_usages[link_in_route_id] >= 0.0); } } -void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, - NocTrafficFlows& noc_traffic_flows_storage, - NocStorage& noc_model, - NocRouting& noc_flows_router, - std::unordered_set& updated_traffic_flows, - const vtr::vector_map& block_locs) { +void NocCostHandler::re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, + const NocTrafficFlows& noc_traffic_flows_storage, + const NocStorage& noc_model, + NocRouting& noc_flows_router, + std::unordered_set& updated_traffic_flows) { // get all the associated traffic flows for the logical router cluster block const auto& assoc_traffic_flows = noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(moved_block_router_id); // if there are traffic flows associated to the current router block, process them - for (auto traffic_flow_id : assoc_traffic_flows) { + for (NocTrafficFlowId traffic_flow_id : assoc_traffic_flows) { // first check to see whether we have already re-routed the current traffic flow and only re-route it if we haven't already. if (updated_traffic_flows.find(traffic_flow_id) == updated_traffic_flows.end()) { - // get all links for this flow route before it is rerouted - // The returned const std::vector& is copied so that we can modify (sort) it - std::vector prev_traffic_flow_links = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id); + // copy all links for this flow route before it is rerouted + std::vector prev_traffic_flow_links = traffic_flow_routes[traffic_flow_id]; // now update the current traffic flow by re-routing it based on the new locations of its src and destination routers - re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_model, noc_flows_router, block_locs); + re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_model, noc_flows_router); // now make sure we don't update this traffic flow a second time by adding it to the group of updated traffic flows updated_traffic_flows.insert(traffic_flow_id); // get all links for this flow route after it is rerouted - std::vector curr_traffic_flow_links = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id); + std::vector curr_traffic_flow_links = traffic_flow_routes[traffic_flow_id]; // find links that appear in the old route or the new one, but not both of them // these are the links whose bandwidth utilization is affected by rerouting - auto unique_links = find_affected_links_by_flow_reroute(prev_traffic_flow_links, curr_traffic_flow_links); + std::vector unique_links = find_affected_links_by_flow_reroute(prev_traffic_flow_links, curr_traffic_flow_links); // update the static data structure to remember which links were affected by router swap affected_noc_links.insert(unique_links.begin(), unique_links.end()); - // update global datastructures to indicate that the current traffic flow was affected due to router cluster blocks being swapped + // update global data structures to indicate that the current traffic flow was affected due to router cluster blocks being swapped affected_traffic_flows.push_back(traffic_flow_id); } } } -void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affected, - const vtr::vector_map& block_locs) { - auto& noc_ctx = g_vpr_ctx.mutable_noc(); - - NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage; +void NocCostHandler::revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affected) { + const auto& noc_ctx = g_vpr_ctx.noc(); + const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage; // keeps track of traffic flows that have been reverted // This is useful for cases where two moved routers were part of the same traffic flow and prevents us from re-routing the same flow twice. @@ -305,11 +305,18 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect const std::vector& assoc_traffic_flows = noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(blk); // if there are traffic flows associated to the current router block, process them - for (auto traffic_flow_id : assoc_traffic_flows) { + for (NocTrafficFlowId traffic_flow_id : assoc_traffic_flows) { // first check to see whether we have already reverted the current traffic flow and only revert it if we haven't already. if (reverted_traffic_flows.find(traffic_flow_id) == reverted_traffic_flows.end()) { - // Revert the traffic flow route by re-routing it - re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, block_locs); + const t_noc_traffic_flow& traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id); + + // decrease the bandwidth utilization of the links in the current route + update_traffic_flow_link_usage(traffic_flow_routes[traffic_flow_id], -1, traffic_flow.traffic_flow_bandwidth); + // increase the bandwidth utilization of the links in the backup route + update_traffic_flow_link_usage(traffic_flow_routes_backup[traffic_flow_id], +1, traffic_flow.traffic_flow_bandwidth); + + // Revert the traffic flow route by restoring the backup + std::swap(traffic_flow_routes[traffic_flow_id], traffic_flow_routes_backup[traffic_flow_id]); // make sure we do not revert this traffic flow again reverted_traffic_flows.insert(traffic_flow_id); @@ -319,35 +326,37 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect } } -void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, - NocTrafficFlows& noc_traffic_flows_storage, - NocStorage& noc_model, - NocRouting& noc_flows_router, - const vtr::vector_map& block_locs) { +void NocCostHandler::re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, + const NocTrafficFlows& noc_traffic_flows_storage, + const NocStorage& noc_model, + NocRouting& noc_flows_router) { // get the current traffic flow info const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id); - /* since the current traffic flow route will be + /* since the current traffic flow route will be * changed, first we need to decrement the bandwidth * usage of all links that are part of * the existing traffic flow route */ - const std::vector& curr_traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id); - update_traffic_flow_link_usage(curr_traffic_flow_route, noc_model, -1, curr_traffic_flow.traffic_flow_bandwidth); + const std::vector& curr_traffic_flow_route = traffic_flow_routes[traffic_flow_id]; + update_traffic_flow_link_usage(curr_traffic_flow_route, -1, curr_traffic_flow.traffic_flow_bandwidth); + + // move the current route to a backup container in case it needs to be reverted + std::swap(traffic_flow_routes[traffic_flow_id], traffic_flow_routes_backup[traffic_flow_id]); // now get the re-routed traffic flow route and increment all the link usages with this reverted route - std::vector& re_routed_traffic_flow_route = route_traffic_flow(traffic_flow_id, noc_model, noc_traffic_flows_storage, noc_flows_router, block_locs); - update_traffic_flow_link_usage(re_routed_traffic_flow_route, noc_model, 1, curr_traffic_flow.traffic_flow_bandwidth); + std::vector& re_routed_traffic_flow_route = route_traffic_flow(traffic_flow_id, noc_model, noc_traffic_flows_storage, noc_flows_router); + update_traffic_flow_link_usage(re_routed_traffic_flow_route, 1, curr_traffic_flow.traffic_flow_bandwidth); } -void recompute_noc_costs(NocCostTerms& new_cost) { - auto& noc_ctx = g_vpr_ctx.noc(); +NocCostTerms NocCostHandler::recompute_noc_costs() const { + const auto& noc_ctx = g_vpr_ctx.noc(); // reset the cost variables first - new_cost = NocCostTerms{0.0, 0.0, 0.0, 0.0}; + NocCostTerms new_cost = NocCostTerms{0.0, 0.0, 0.0, 0.0}; // go through the costs of all the traffic flows and add them up to recompute the total costs associated with the NoC - for (const auto& traffic_flow_id : noc_ctx.noc_traffic_flows_storage.get_all_traffic_flow_id()) { + for (const NocTrafficFlowId traffic_flow_id : noc_ctx.noc_traffic_flows_storage.get_all_traffic_flow_id()) { new_cost.aggregate_bandwidth += traffic_flow_costs[traffic_flow_id].aggregate_bandwidth; new_cost.latency += traffic_flow_costs[traffic_flow_id].latency; new_cost.latency_overrun += traffic_flow_costs[traffic_flow_id].latency_overrun; @@ -357,9 +366,56 @@ void recompute_noc_costs(NocCostTerms& new_cost) { for (auto& link_id : noc_ctx.noc_model.get_noc_links()) { new_cost.congestion += link_congestion_costs[link_id]; } + + return new_cost; } -void update_noc_normalization_factors(t_placer_costs& costs) { +void NocCostHandler::recompute_costs_from_scratch(const t_noc_opts& noc_opts, t_placer_costs& costs) const { + auto check_and_print_cost = [](double new_cost, + double old_cost, + const std::string& cost_name) -> void { + if (!vtr::isclose(new_cost, old_cost, PL_INCREMENTAL_COST_TOLERANCE, 0.)) { + std::string msg = vtr::string_fmt( + "in recompute_costs_from_scratch: new_%s = %g, old %s = %g, ERROR_TOL = %g\n", + cost_name.c_str(), new_cost, cost_name.c_str(), old_cost, PL_INCREMENTAL_COST_TOLERANCE); + VPR_ERROR(VPR_ERROR_PLACE, msg.c_str()); + } + }; + + if (noc_opts.noc) { + NocCostTerms new_noc_cost = recompute_noc_costs(); + + check_and_print_cost(new_noc_cost.aggregate_bandwidth, + costs.noc_cost_terms.aggregate_bandwidth, + "noc_aggregate_bandwidth"); + costs.noc_cost_terms.aggregate_bandwidth = new_noc_cost.aggregate_bandwidth; + + // only check if the recomputed cost and the current noc latency cost are within the error tolerance if the cost is above 1 picosecond. + // Otherwise, there is no need to check (we expect the latency cost to be above the threshold of 1 picosecond) + if (new_noc_cost.latency > MIN_EXPECTED_NOC_LATENCY_COST) { + check_and_print_cost(new_noc_cost.latency, + costs.noc_cost_terms.latency, + "noc_latency_cost"); + } + costs.noc_cost_terms.latency = new_noc_cost.latency; + + if (new_noc_cost.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST) { + check_and_print_cost(new_noc_cost.latency_overrun, + costs.noc_cost_terms.latency_overrun, + "noc_latency_overrun_cost"); + } + costs.noc_cost_terms.latency_overrun = new_noc_cost.latency_overrun; + + if (new_noc_cost.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) { + check_and_print_cost(new_noc_cost.congestion, + costs.noc_cost_terms.congestion, + "noc_congestion_cost"); + } + costs.noc_cost_terms.congestion = new_noc_cost.congestion; + } +} + +void NocCostHandler::update_noc_normalization_factors(t_placer_costs& costs) { //Prevent the norm factors from going to infinity costs.noc_cost_norm_factors.aggregate_bandwidth = std::min(1 / costs.noc_cost_terms.aggregate_bandwidth, MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST); costs.noc_cost_norm_factors.latency = std::min(1 / costs.noc_cost_terms.latency, MAX_INV_NOC_LATENCY_COST); @@ -381,10 +437,10 @@ void update_noc_normalization_factors(t_placer_costs& costs) { } } -double comp_noc_aggregate_bandwidth_cost() { +double NocCostHandler::comp_noc_aggregate_bandwidth_cost() { // used to get traffic flow route information - auto& noc_ctx = g_vpr_ctx.noc(); - // datastructure that stores all the traffic flow routes + const auto& noc_ctx = g_vpr_ctx.noc(); + // data structure that stores all the traffic flow routes const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage; double noc_aggregate_bandwidth_cost = 0.; @@ -393,11 +449,11 @@ double comp_noc_aggregate_bandwidth_cost() { // aggregate bandwidth. Then store this in local data structures and accumulate it. for (const auto& traffic_flow_id : g_vpr_ctx.noc().noc_traffic_flows_storage.get_all_traffic_flow_id()) { const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id); - const std::vector& curr_traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id); + const std::vector& curr_traffic_flow_route = traffic_flow_routes[traffic_flow_id]; double curr_traffic_flow_aggregate_bandwidth_cost = calculate_traffic_flow_aggregate_bandwidth_cost(curr_traffic_flow_route, curr_traffic_flow); - // store the calculated aggregate bandwidth for the current traffic flow in local datastructures (this also initializes them) + // store the calculated aggregate bandwidth for the current traffic flow in local data structures (this also initializes them) traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = curr_traffic_flow_aggregate_bandwidth_cost; // accumulate the aggregate bandwidth cost @@ -407,10 +463,10 @@ double comp_noc_aggregate_bandwidth_cost() { return noc_aggregate_bandwidth_cost; } -std::pair comp_noc_latency_cost() { +std::pair NocCostHandler::comp_noc_latency_cost() { // used to get traffic flow route information - auto& noc_ctx = g_vpr_ctx.noc(); - // datastructure that stores all the traffic flow routes + const auto& noc_ctx = g_vpr_ctx.noc(); + // data structure that stores all the traffic flow routes const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage; std::pair noc_latency_cost_terms{0.0, 0.0}; @@ -419,11 +475,11 @@ std::pair comp_noc_latency_cost() { // latency. Then store this in local data structures and accumulate it. for (const auto& traffic_flow_id : noc_ctx.noc_traffic_flows_storage.get_all_traffic_flow_id()) { const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id); - const std::vector& curr_traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id); + const std::vector& curr_traffic_flow_route = traffic_flow_routes[traffic_flow_id]; auto [curr_traffic_flow_latency, curr_traffic_flow_latency_overrun] = calculate_traffic_flow_latency_cost(curr_traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow); - // store the calculated latency cost terms for the current traffic flow in local datastructures (this also initializes them) + // store the calculated latency cost terms for the current traffic flow in local data structures (this also initializes them) traffic_flow_costs[traffic_flow_id].latency = curr_traffic_flow_latency; traffic_flow_costs[traffic_flow_id].latency_overrun = curr_traffic_flow_latency_overrun; @@ -435,15 +491,15 @@ std::pair comp_noc_latency_cost() { return noc_latency_cost_terms; } -double comp_noc_congestion_cost() { +double NocCostHandler::comp_noc_congestion_cost() { // Used to access NoC links - auto& noc_ctx = g_vpr_ctx.noc(); + const auto& noc_ctx = g_vpr_ctx.noc(); double congestion_cost = 0.; // Iterate over all NoC links - for (const auto& link : noc_ctx.noc_model.get_noc_links()) { - double link_congestion_cost = calculate_link_congestion_cost(link); + for (const NocLink& link : noc_ctx.noc_model.get_noc_links()) { + double link_congestion_cost = get_link_congestion_cost(link); // store the congestion cost for this link in static data structures (this also initializes them) link_congestion_costs[link] = link_congestion_cost; @@ -455,30 +511,23 @@ double comp_noc_congestion_cost() { return congestion_cost; } -int check_noc_placement_costs(const t_placer_costs& costs, - double error_tolerance, - const t_noc_opts& noc_opts, - const vtr::vector_map& block_locs) { +int NocCostHandler::check_noc_placement_costs(const t_placer_costs& costs, + double error_tolerance, + const t_noc_opts& noc_opts) const { int error = 0; - NocCostTerms cost_check{0.0, 0.0, 0.0, 0.0};; + NocCostTerms cost_check{0.0, 0.0, 0.0, 0.0}; auto& noc_ctx = g_vpr_ctx.noc(); const NocStorage& noc_model = noc_ctx.noc_model; const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage; // a copy of NoC link storage used to calculate link bandwidth utilization from scratch - vtr::vector temp_noc_link_storage = noc_model.get_noc_links(); - - // reset bandwidth utilization for all links - std::for_each(temp_noc_link_storage.begin(), temp_noc_link_storage.end(), [](NocLink& link) { link.set_bandwidth_usage(0.0); }); + vtr::vector temp_noc_link_bw_usage(noc_model.get_number_of_noc_links(), 0.0); // need to create a temporary noc routing algorithm std::unique_ptr temp_noc_routing_algorithm = NocRoutingAlgorithmCreator::create_routing_algorithm(noc_opts.noc_routing_algorithm, noc_model); - // stores a temporarily found route for a traffic flow - std::vector temp_found_noc_route; - // go through all the traffic flows and find a route for them based on where the routers are placed within the NoC for (const auto& traffic_flow_id : noc_traffic_flows_storage.get_all_traffic_flow_id()) { // get the traffic flow with the current id @@ -489,8 +538,11 @@ int check_noc_placement_costs(const t_placer_costs& costs, ClusterBlockId logical_sink_router_block_id = curr_traffic_flow.sink_router_cluster_id; // get the ids of the hard router blocks where the logical router cluster blocks have been placed - NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(block_locs[logical_source_router_block_id].loc); - NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(block_locs[logical_sink_router_block_id].loc); + NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(block_locs_ref[logical_source_router_block_id].loc); + NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(block_locs_ref[logical_sink_router_block_id].loc); + + // stores a temporarily found route for a traffic flow + std::vector temp_found_noc_route; // route the current traffic flow temp_noc_routing_algorithm->route_flow(source_router_block_id, sink_router_block_id, traffic_flow_id, temp_found_noc_route, noc_model); @@ -505,19 +557,16 @@ int check_noc_placement_costs(const t_placer_costs& costs, // increase bandwidth utilization for the links that constitute the current flow's route for (NocLinkId link_id : temp_found_noc_route) { - auto& link = temp_noc_link_storage[link_id]; - double curr_link_bw_util = link.get_bandwidth_usage(); - link.set_bandwidth_usage(curr_link_bw_util + curr_traffic_flow.traffic_flow_bandwidth); - VTR_ASSERT(link.get_bandwidth_usage() >= 0.0); + temp_noc_link_bw_usage[link_id] += curr_traffic_flow.traffic_flow_bandwidth; + VTR_ASSERT(temp_noc_link_bw_usage[link_id] >= 0.0); } - - // clear the current traffic flow route, so we can route the next traffic flow - temp_found_noc_route.clear(); } // Iterate over all NoC links and accumulate congestion cost - for (const auto& link : temp_noc_link_storage) { - cost_check.congestion += calculate_link_congestion_cost(link); + for (const NocLink& link : noc_model.get_noc_links()) { + double link_bw = link.get_bandwidth(); + double used_link_bw = temp_noc_link_bw_usage[link.get_link_id()]; + cost_check.congestion += std::max(used_link_bw - link_bw, 0.) / link_bw; } // check whether the aggregate bandwidth placement cost is within the error tolerance @@ -587,7 +636,7 @@ std::pair calculate_traffic_flow_latency_cost(const std::vector< noc_link_latency_component = noc_link_latency * num_of_links_in_traffic_flow; } - double noc_router_latency_component = 0.0; + double noc_router_latency_component; if (noc_model.get_detailed_router_latency()) { NocLinkId first_noc_link_id = traffic_flow_route[0]; @@ -624,8 +673,12 @@ std::pair calculate_traffic_flow_latency_cost(const std::vector< return {latency, latency_overrun}; } -double calculate_link_congestion_cost(const NocLink& link) { - double congested_bw_ratio = link.get_congested_bandwidth_ratio(); +double NocCostHandler::get_link_congestion_cost(const NocLink& link) const { + double bandwidth = link.get_bandwidth(); + double bandwidth_usage = link_bandwidth_usages[link]; + + double congested_bandwidth = std::max(bandwidth_usage - bandwidth, 0.0); + double congested_bw_ratio = congested_bandwidth / bandwidth; return congested_bw_ratio; } @@ -675,18 +728,18 @@ double calculate_noc_cost(const NocCostTerms& cost_terms, return cost; } -int get_number_of_traffic_flows_with_latency_cons_met() { +int NocCostHandler::get_number_of_traffic_flows_with_latency_cons_met() const { // used to get traffic flow route information - auto& noc_ctx = g_vpr_ctx.mutable_noc(); - // datastructure that stores all the traffic flow routes + const auto& noc_ctx = g_vpr_ctx.noc(); + // data structure that stores all the traffic flow routes const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage; int count_of_achieved_latency_cons = 0; // now go through each traffic flow route and check if its latency constraint was met - for (const auto& traffic_flow_id : noc_traffic_flows_storage.get_all_traffic_flow_id()) { + for (NocTrafficFlowId traffic_flow_id : noc_traffic_flows_storage.get_all_traffic_flow_id()) { const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id); - const std::vector& curr_traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id); + const std::vector& curr_traffic_flow_route = traffic_flow_routes[traffic_flow_id]; // there will always be one more router than links in a traffic flow int num_of_links_in_traffic_flow = curr_traffic_flow_route.size(); @@ -709,15 +762,15 @@ int get_number_of_traffic_flows_with_latency_cons_met() { return count_of_achieved_latency_cons; } -int get_number_of_congested_noc_links() { +int NocCostHandler::get_number_of_congested_noc_links() const { // get NoC links - auto& noc_links = g_vpr_ctx.noc().noc_model.get_noc_links(); + const auto& noc_links = g_vpr_ctx.noc().noc_model.get_noc_links(); int num_congested_links = 0; // Iterate over all NoC links and count the congested ones for (const auto& link : noc_links) { - double congested_bw_ratio = link.get_congested_bandwidth_ratio(); + double congested_bw_ratio = get_link_congestion_cost(link); if (congested_bw_ratio > MIN_EXPECTED_NOC_CONGESTION_COST) { num_congested_links++; @@ -727,30 +780,38 @@ int get_number_of_congested_noc_links() { return num_congested_links; } -double get_total_congestion_bandwidth_ratio() { +double NocCostHandler::get_total_congestion_bandwidth_ratio() const { // get NoC links - auto& noc_links = g_vpr_ctx.noc().noc_model.get_noc_links(); + const auto& noc_links = g_vpr_ctx.noc().noc_model.get_noc_links(); double accum_congestion_ratio = 0.0; // Iterate over all NoC links and count the congested ones for (const auto& link : noc_links) { - double congested_bw_ratio = link.get_congested_bandwidth_ratio(); + double congested_bw_ratio = get_link_congestion_cost(link); accum_congestion_ratio += congested_bw_ratio; } return accum_congestion_ratio; } -std::vector get_top_n_congested_links(int n) { +double NocCostHandler::get_link_used_bandwidth(NocLinkId link_id) const { + return link_bandwidth_usages[link_id]; +} + +const vtr::vector& NocCostHandler::get_link_bandwidth_usages() const { + return link_bandwidth_usages; +} + +std::vector NocCostHandler::get_top_n_congested_links(int n) { // get NoC links vtr::vector noc_links = g_vpr_ctx.noc().noc_model.get_noc_links(); // Sort links based on their congested bandwidth ration in descending order // stable_sort is used to make sure the order is the same across different machines/compilers // Note that when the vector is sorted, indexing it with NocLinkId does return the corresponding link - std::stable_sort(noc_links.begin(), noc_links.end(), [](const NocLink& l1, const NocLink& l2) { - return l1.get_congested_bandwidth_ratio() > l2.get_congested_bandwidth_ratio(); + std::stable_sort(noc_links.begin(), noc_links.end(), [this](const NocLink& l1, const NocLink& l2) { + return link_bandwidth_usages[l1] > link_bandwidth_usages[l2]; }); int pick_n = std::min((int)noc_links.size(), n); @@ -758,30 +819,6 @@ std::vector get_top_n_congested_links(int n) { return std::vector{noc_links.begin(), noc_links.begin() + pick_n}; } -void allocate_and_load_noc_placement_structs() { - auto& noc_ctx = g_vpr_ctx.noc(); - - int number_of_traffic_flows = noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows(); - - traffic_flow_costs.resize(number_of_traffic_flows, {INVALID_NOC_COST_TERM, INVALID_NOC_COST_TERM}); - proposed_traffic_flow_costs.resize(number_of_traffic_flows, {INVALID_NOC_COST_TERM, INVALID_NOC_COST_TERM}); - - int number_of_noc_links = noc_ctx.noc_model.get_number_of_noc_links(); - - link_congestion_costs.resize(number_of_noc_links, INVALID_NOC_COST_TERM); - proposed_link_congestion_costs.resize(number_of_noc_links, INVALID_NOC_COST_TERM); -} - -void free_noc_placement_structs() { - vtr::release_memory(traffic_flow_costs); - vtr::release_memory(proposed_traffic_flow_costs); - vtr::release_memory(affected_traffic_flows); - - vtr::release_memory(link_congestion_costs); - vtr::release_memory(proposed_link_congestion_costs); - vtr::release_memory(affected_noc_links); -} - /* Below are functions related to the feature that forces to the placer to swap router blocks for a certain percentage of the total number of swaps */ bool check_for_router_swap(int user_supplied_noc_router_swap_percentage) { /* A random number between 0-100 is generated here and compared to the user @@ -798,7 +835,7 @@ static bool select_random_router_cluster(ClusterBlockId& b_from, t_logical_block_type_ptr& cluster_from_type, const vtr::vector_map& block_locs) { // need to access all the router cluster blocks in the design - auto& noc_ctx = g_vpr_ctx.noc(); + const auto& noc_ctx = g_vpr_ctx.noc(); auto& cluster_ctx = g_vpr_ctx.clustering(); // get a reference to the collection of router cluster blocks in the design @@ -866,9 +903,9 @@ e_create_move propose_router_swap(t_pl_blocks_to_be_moved& blocks_affected, void write_noc_placement_file(const std::string& file_name, const vtr::vector_map& block_locs) { // we need the clustered netlist to get the names of all the NoC router cluster blocks - auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); // we need the NoC context to identify the physical router ids based on their locations on the device - auto& noc_ctx = g_vpr_ctx.noc(); + const auto& noc_ctx = g_vpr_ctx.noc(); // file to write the placement information to std::fstream noc_placement_file; @@ -910,13 +947,8 @@ void write_noc_placement_file(const std::string& file_name, noc_placement_file.close(); } -bool noc_routing_has_cycle(const vtr::vector_map& block_locs) { - // used to access traffic flow routes - const auto& noc_ctx = g_vpr_ctx.noc(); - // get all traffic flow routes - const auto& traffic_flow_routes = noc_ctx.noc_traffic_flows_storage.get_all_traffic_flow_routes(); - - bool has_cycle = noc_routing_has_cycle(traffic_flow_routes, block_locs); +bool NocCostHandler::noc_routing_has_cycle() const { + bool has_cycle = ::noc_routing_has_cycle(traffic_flow_routes, block_locs_ref); return has_cycle; } @@ -956,7 +988,9 @@ void invoke_sat_router(t_placer_costs& costs, const t_noc_opts& noc_opts, int se } #endif -void print_noc_costs(std::string_view header, const t_placer_costs& costs, const t_noc_opts& noc_opts) { +void NocCostHandler::print_noc_costs(std::string_view header, + const t_placer_costs& costs, + const t_noc_opts& noc_opts) const { VTR_LOG("%s. " "cost: %g, " "aggregate_bandwidth_cost: %g, " @@ -977,6 +1011,7 @@ void print_noc_costs(std::string_view header, const t_placer_costs& costs, const get_number_of_congested_noc_links()); } + static std::vector find_affected_links_by_flow_reroute(std::vector& prev_links, std::vector& curr_links) { // Sort both link containers diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h index 99199d7b021..ec56c1c5c87 100644 --- a/vpr/src/place/noc_place_utils.h +++ b/vpr/src/place/noc_place_utils.h @@ -5,374 +5,560 @@ #include "move_utils.h" #include "place_util.h" -// represent the maximum values of the NoC cost normalization factors // -// we need to handle the case where the aggregate bandwidth is 0, so we set this to some arbitrary positive number that is greater than 1.e-9, since that is the range we expect the normalization factor to be (in Gbps) -constexpr double MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST = 1.; -// we expect the latency costs to be in the pico-second range, and we don't expect it to go lower than that. So if the latency costs go below the pico-second range we trim the normalization value to be no higher than 1/ps -// This should be updated if the delays become lower -constexpr double MAX_INV_NOC_LATENCY_COST = 1.e12; -// we don't expect the noc_latency cost to ever go below 1 pico second. -// So this value represents the lowest possible latency cost. -constexpr double MIN_EXPECTED_NOC_LATENCY_COST = 1.e-12; -// the congestion cost for a link is measured as the proportion of the overloaded BW to the link capacity -// We assume that when a link congested, it is overloaded with at least 0.1% of its BW capacity -constexpr double MAX_INV_NOC_CONGESTION_COST = 1.e3; -// If a link is overloaded by less than 0.1% of the link bandwidth capacity, -// we assume it is not congested. -constexpr double MIN_EXPECTED_NOC_CONGESTION_COST = 1.e-3; - -constexpr double INVALID_NOC_COST_TERM = -1.0; - -/** - * @brief Each traffic flow cost consists of two components: - * 1) traffic flow aggregate bandwidth (sum over all used links of the traffic flow bandwidth) - * 2) traffic flow latency (currently unloaded/best-case latency of the flow) - * 3) traffic flow latency overrun (how much the latency is higher than the - * latency constraint for a traffic flow. - * NoC placement code will keep an array-of-struct to easily access each - * traffic flow cost. - */ -struct TrafficFlowPlaceCost { - double aggregate_bandwidth = INVALID_NOC_COST_TERM; - double latency = INVALID_NOC_COST_TERM; - double latency_overrun = INVALID_NOC_COST_TERM; -}; - /** - * @brief Initializes the link bandwidth usage for all NoC links. + * @class NocCostHandler is responsible for computing NoC-related costs terms. * - * If traffic flow routes are not passed to this function, it uses a NoC routing algorithm - * to route all traffic flows. The caller can prevent this function from routing traffic flows - * by passing routes for all traffic flows. This should be called after initial placement, - * where all the logical NoC router blocks have been placed for the first time and no traffic - * flows have been routed yet. In this case an empty vector should be passed to the function. - * This function can also be called after modification of traffic flow routes. For example, - * NoC SAT routing algorithm generates new traffic flow routes to avoid congestion. The routes - * generate by the SAT router should be passed to this function - * - * - * @param new_traffic_flow_routes Traffic flow routes used to initialize link bandwidth utilization. - * If an empty vector is passed, this function uses a routing algorithm to route traffic flows. - * @param block_locs Contains the location where each clustered block is placed at. - */ -void initial_noc_routing(const vtr::vector>& new_traffic_flow_routes, - const vtr::vector_map& block_locs); - -/** - * @brief Re-initializes all link bandwidth usages by either re-routing - * all traffic flows or using the provided traffic flow routes. This functions - * also initializes static variables in noc_place_utils.cpp that are used to - * keep track of NoC-related costs. - * - * This function should be called when a placement checkpoint is restored. - * If the router placement in the checkpoint is different from the last - * router placement before the checkpoint is restored, link bandwidth usage, - * traffic flow routes, and static variable in noc_place_utils.cpp are no - * longer valid and need to be re-initialized. - * - * This function should be called after NoC SAT routing algorithm returns its - * traffic flow routes. - * - * @param costs Used to get aggregate bandwidth and latency costs. - * @param new_traffic_flow_routes Traffic flow routes used to initialize link bandwidth utilization. - * If an empty vector is passed, this function uses a routing algorithm to route traffic flows. - * @param block_locs Contains the location where each clustered block is placed at. - */ -void reinitialize_noc_routing(t_placer_costs& costs, - const vtr::vector>& new_traffic_flow_routes, - const vtr::vector_map& block_locs); - -/** - * @brief Goes through all the cluster blocks that were moved - * in a single swap iteration during placement and checks to see - * if any moved blocks were NoC routers. - * - * For each moved block that is a NoC router, all the traffic flows - * that the router is a part of are re-routed. The individual noc placement - * costs (latency and aggregate bandwidth) are also updated to - * reflect the re-routed traffic flows. This update is done to the - * 'proposed_traffic_flow_aggregate_bandwidth_cost' and - * 'proposed_traffic_flow_latency_cost' datastructures found in - * 'noc_place_utils.cpp'. - * - * Finally the overall change in NoC costs for a given placement - * iteration is computed. - * - * If none of the moved blocks are NoC routers, then this function - * does nothing. - * - * This function should be used if the user enabled NoC optimization - * during placement and after a move has been proposed. - * - * @param blocks_affected Contains all the blocks that were moved in - * the current placement iteration. This includes the cluster ids of - * the moved blocks, their previous locations and their new locations - * after being moved. - * @param noc_aggregate_bandwidth_delta_c The change in the overall - * NoC aggregate bandwidth cost caused by a placer move is stored here. - * @param noc_latency_delta_c The change in the overall - * NoC latency cost caused by a placer move is stored here. - * @param block_locs Contains the location where each clustered block is placed at. - */ -void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, - NocCostTerms& delta_c, - const vtr::vector_map& block_locs); - -/** - * @brief Updates static datastructures found in 'noc_place_utils.cpp' - * which keep track of the aggregate bandwidth and latency costs of all - * traffic flows in the design. - * - * Go through the 'affected_traffic_flows' datastructure which contains - * the traffic flows which have been modified in a given placement - * iteration. For each traffic flow update the aggregate bandwidth and - * latency costs within the 'traffic_flow_aggregate_bandwidth_cost' and - * 'traffic_flow_latency_cost' datastructures with their proposed values - * which can be found in the 'proposed_traffic_flow_aggregate_bandwidth_cost' - * and 'proposed_traffic_flow_latency_cost' datastructures. - * - * This function should be used after a proposed move which includes NoC - * router blocks (logical) is accepted. The move needs to be accepted - * since the affected traffic flow costs are updated here to reflect the - * current placement and the placement is only updated when a move is - * accepted. - */ -void commit_noc_costs(); - -/** - * @brief Routes a given traffic flow within the NoC based on where the - * logical cluster blocks in the traffic flow are currently placed. The - * found route is stored and returned externally. - * - * First, the hard routers blocks that represent the placed location of - * the router cluster blocks are identified. Then the traffic flow - * is routed and updated. + * @details Once all NoC routers are placed for the first time, traffic flows between + * them can be routed by calling initial_noc_routing(). Internal data structures used + * to enable incremental cost computation can be initialized by calling + * comp_noc_aggregate_bandwidth_cost(), comp_noc_latency_cost(), + * and comp_noc_congestion_cost(). For incremental cost calculation, + * find_affected_noc_routers_and_update_noc_costs() should be called to compute + * how much NoC-related cost term would change if a few NoC routers are moved around. + * commit_noc_costs() or revert_noc_traffic_flow_routes() should be called to decide whether + * the NoC router swaps are committed or proposed moved needs to be reverted. * - * Note that this function does not update the link bandwidth utilization. - * update_traffic_flow_link_usage() should be called after this function - * to update the link utilization for the new route. If the flow is re-routed - * because either its source or destination are moved, update_traffic_flow_link_usage() - * should be used to reduce the bandwidth utilization for the old route. - * - * @param traffic_flow_id Represents the traffic flow that needs to be routed - * @param noc_model Contains all the links and routers within the NoC. Used - * to route traffic flows within the NoC. - * @param noc_traffic_flows_storage Contains all the traffic flow information - * within the NoC. Used to get the current traffic flow information. - * @param noc_flows_router The packet routing algorithm used to route traffic - * flows within the NoC. - * @param block_locs Contains the location where each clustered block is placed at. - * @return std::vector& The found route for the traffic flow. */ -std::vector& route_traffic_flow(NocTrafficFlowId traffic_flow_id, +class NocCostHandler { + public: + /** + * @param block_locs Contains the location where each clustered block is placed at. + */ + explicit NocCostHandler(const vtr::vector_map& block_locs); + + NocCostHandler() = delete; + NocCostHandler(const NocCostHandler&) = delete; + NocCostHandler& operator=(const NocCostHandler&) = delete; + NocCostHandler(NocCostHandler&&) = default; + NocCostHandler& operator=(NocCostHandler&&) = default; + + /** + * @brief Check if the internal reference to block_locs is pointing to the same + * block_locs provided as an argument. + * @param block_locs The provided block_locs whose address is compared with the internal + * reference to block_locs. + * @return True if both the internal reference and the provided argument point to the same + * block_locs object. + */ + bool points_to_same_block_locs(const vtr::vector_map& block_locs) const; + + /** + * @brief Initializes the link bandwidth usage for all NoC links. + * + * If traffic flow routes are not passed to this function, it uses a NoC routing algorithm + * to route all traffic flows. The caller can prevent this function from routing traffic flows + * by passing routes for all traffic flows. This should be called after initial placement, + * where all the logical NoC router blocks have been placed for the first time and no traffic + * flows have been routed yet. In this case an empty vector should be passed to the function. + * This function can also be called after modification of traffic flow routes. For example, + * NoC SAT routing algorithm generates new traffic flow routes to avoid congestion. The routes + * generate by the SAT router should be passed to this function + * + * + * @param new_traffic_flow_routes Traffic flow routes used to initialize link bandwidth utilization. + * If an empty vector is passed, this function uses a routing algorithm to route traffic flows. + */ + void initial_noc_routing(const vtr::vector>& new_traffic_flow_routes); + + /** + * @brief Re-initializes all link bandwidth usages by either re-routing + * all traffic flows or using the provided traffic flow routes. This functions + * also initializes static variables in noc_place_utils.cpp that are used to + * keep track of NoC-related costs. + * + * This function should be called when a placement checkpoint is restored. + * If the router placement in the checkpoint is different from the last + * router placement before the checkpoint is restored, link bandwidth usage, + * traffic flow routes, and static variable in noc_place_utils.cpp are no + * longer valid and need to be re-initialized. + * + * This function should be called after NoC SAT routing algorithm returns its + * traffic flow routes. + * + * @param costs Used to get aggregate bandwidth and latency costs. + * @param new_traffic_flow_routes Traffic flow routes used to initialize link bandwidth utilization. + * If an empty vector is passed, this function uses a routing algorithm to route traffic flows. + */ + void reinitialize_noc_routing(t_placer_costs& costs, + const vtr::vector>& new_traffic_flow_routes); + + /** + * @brief Goes through all the cluster blocks that were moved + * in a single swap iteration during placement and checks to see + * if any moved blocks were NoC routers. + * + * For each moved block that is a NoC router, all the traffic flows + * that the router is a part of are re-routed. The individual noc placement + * costs (latency and aggregate bandwidth) are also updated to + * reflect the re-routed traffic flows. This update is done to the + * 'proposed_traffic_flow_aggregate_bandwidth_cost' and + * 'proposed_traffic_flow_latency_cost' data structures found in + * 'noc_place_utils.cpp'. + * + * Finally the overall change in NoC costs for a given placement + * iteration is computed. + * + * If none of the moved blocks are NoC routers, then this function + * does nothing. + * + * This function should be used if the user enabled NoC optimization + * during placement and after a move has been proposed. + * + * @param blocks_affected Contains all the blocks that were moved in + * the current placement iteration. This includes the cluster ids of + * the moved blocks, their previous locations and their new locations + * after being moved. + * @param noc_aggregate_bandwidth_delta_c The change in the overall + * NoC aggregate bandwidth cost caused by a placer move is stored here. + * @param noc_latency_delta_c The change in the overall + * NoC latency cost caused by a placer move is stored here. + */ + void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, + NocCostTerms& delta_c); + + /** + * @brief Updates static data structures found in 'noc_place_utils.cpp' + * which keep track of the aggregate bandwidth and latency costs of all + * traffic flows in the design. + * + * Go through the 'affected_traffic_flows' data structure which contains + * the traffic flows which have been modified in a given placement + * iteration. For each traffic flow update the aggregate bandwidth and + * latency costs within the 'traffic_flow_aggregate_bandwidth_cost' and + * 'traffic_flow_latency_cost' data structures with their proposed values + * which can be found in the 'proposed_traffic_flow_aggregate_bandwidth_cost' + * and 'proposed_traffic_flow_latency_cost' data structures. + * + * This function should be used after a proposed move which includes NoC + * router blocks (logical) is accepted. The move needs to be accepted + * since the affected traffic flow costs are updated here to reflect the + * current placement and the placement is only updated when a move is + * accepted. + */ + void commit_noc_costs(); + + /** + * @brief Used to re-route all the traffic flows associated to logical + * router blocks that were supposed to be moved during placement but are + * back to their original positions. + * + * The routing function is called to find the original traffic flow route + * again. + * + * @param blocks_affected Contains all the blocks that were moved in + * the current placement iteration. This includes the cluster ids of + * the moved blocks, their previous locations and their new locations + * after being moved. + */ + void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affected); + + /** + * @brief Recompute the NoC costs (aggregate bandwidth and latency) by + * accumulating the individual traffic flow costs and then verify + * whether the result is within an error tolerance of the placements + * NoC costs. + * + * During placement, the NoC aggregate bandwidth and latency costs are + * incrementally updated by adding the change in NoC costs caused by + * each accepted move. This function quickly verifies whether + * the incremental changes to the NoC costs are correct by comparing + * the current costs to newly computed costs for the current + * placement state. This function assumes the traffic flows have + * been routed and all the individual NoC costs of all traffic flows are + * correct and only accumulates them to compute the new overall NoC costs. + * + * If the comparison is larger than the error tolerance then it + * implies that the incremental cost updates were incorrect and + * an error is thrown. + * + * This function is not very expensive and can be called regularly during + * placement to ensure the NoC costs do not deviate too far off + * from their correct values. + * + * @return NoC cost terms computed from scratch. + */ + NocCostTerms recompute_noc_costs() const; + + void recompute_costs_from_scratch(const t_noc_opts& noc_opts, + t_placer_costs& costs) const; + + /** + * @brief Updates all the cost normalization factors relevant to the NoC. + * Handles exceptional cases so that the normalization factors do not + * reach INF. + * This is intended to be used to initialize the normalization factors of + * the NoC and also at the outer loop iteration of placement to + * balance the NoC costs with other placement cost parameters. + * + * @param costs Contains the normalization factors which need to be updated + */ + static void update_noc_normalization_factors(t_placer_costs& costs) ; + + /** + * @brief Calculates the aggregate bandwidth of each traffic flow in the NoC + * and initializes local variables that keep track of the traffic flow + * aggregate bandwidths cost. + * Then the total aggregate bandwidth cost is determines by summing up all + * the individual traffic flow aggregate bandwidths. + * + * This should be used after initial placement to determine the starting + * aggregate bandwidth cost of the NoC. + * + * @return double The aggregate bandwidth cost of the NoC. + */ + double comp_noc_aggregate_bandwidth_cost(); + + /** + * @brief Calculates the latency cost of each traffic flow in the NoC + * and initializes local variables that keep track of the traffic flow + * latency costs. Then the total latency cost is determined by summing up all + * the individual traffic flow latency costs. + * + * This should be used after initial placement to determine the starting latency + * cost of the NoC. + * + * @return double The latency cost of the NoC. + */ + std::pair comp_noc_latency_cost(); + + /** + * @brief Calculates the congestion cost of each link in the NoC and initializes + * member variables that keep track of link congestion costs. Then, the total congestion + * cost is computed by summing up all the individual NoC link congestion costs. + * + * This should be used after initial placement to determine the starting + * congestion cost of the NoC. + * + * @returndouble The congestion cost of the NoC + */ + double comp_noc_congestion_cost(); + + /** + * @brief Given a placement state the NoC costs are re-computed + * from scratch and compared to the current NoC placement costs. + * This involves finding new routes for all traffic + * flows and then computing the aggregate bandwidth and latency costs + * for the traffic flows using the newly found routes. Then the + * overall NoC costs are computed by accumulating the newly found traffic flow + * costs. + * + * THe newly computed NoC costs are compared to the current NoC costs to + * check if they are within an error tolerance. If the comparison is + * larger than the error tolerance then an error is thrown and it indicates + * that the incremental NoC costs updates are incorrect. + * + * This function is similar to 'recompute_noc_costs' but the traffic flow + * routes and costs are computed as well. As a result this function is very + * expensive and should be used in larger intervals (number of moves) within + * the placer. + * + * @param costs Contains the current NoC aggregate bandwidth and latency costs + * which are computed incrementally after each logical router move during + * placement. + * @param error_tolerance The maximum allowable difference between the current + * NoC costs and the newly computed NoC costs. + * @param noc_opts Contains information necessary to compute the NoC costs + * from scratch. For example this would include the routing algorithm and + * weights for the difference components of the NoC costs. + * @return An integer which represents the status of the comparison. 0 + * indicates that the current NoC costs are within the error tolerance and + * a non-zero values indicates the current NoC costs are above the error + * tolerance. + */ + int check_noc_placement_costs(const t_placer_costs& costs, + double error_tolerance, + const t_noc_opts& noc_opts) const; + + /** + * @brief Goes through all NoC links and determines whether they + * are congested or not. Then finds n links that are most congested. + * + * @return n links with highest congestion ratio + */ + std::vector get_top_n_congested_links(int n); + + /** + * @brief Goes through all NoC links and determines whether they + * are congested or not. Then finds n links that are most congested. + * + * @return n highest congestion ratios + */ + std::vector get_top_n_congestion_ratios(int n); + + /** + * @brief Goes through all the traffic flows and determines whether the + * latency constraints have been met for each traffic flow. + * + * @return The total number of traffic flows with latency constraints being met + */ + int get_number_of_traffic_flows_with_latency_cons_met() const; + + /** + * @brief Goes through all NoC links and counts the congested ones. + * A congested NoC link is a link whose used bandwidth exceeds its + * bandwidth capacity. + * + * @return The total number of congested NoC links. + */ + int get_number_of_congested_noc_links() const; + + /** + * @brief Goes through all NoC links and determines whether they + * are congested or not. Then adds up the congestion ratio of all + * congested links. + * + * @return The total congestion ratio + */ + double get_total_congestion_bandwidth_ratio() const; + + /** + * @brief Returns the utilized bandwidth of a NoC link. + * @param link_id The Id of the NoC link whose used bandwidth is desired. + * @return The used bandwidth of the given NoC link. + */ + double get_link_used_bandwidth(NocLinkId link_id) const; + + /** + * Returns the utilized bandwidth of all NoC links. + * @return A const reference to link bandwidth utilization of all links. + */ + const vtr::vector& get_link_bandwidth_usages() const; + + /** + * @brief Determines the congestion cost a NoC link. The cost + * is calculating by measuring how much the current bandwidth + * going through the link exceeds the link's bandwidth capacity. + * + * @param link The NoC link for which the congestion cost is + * to be computed + * @return The computed congestion cost for the given NoC link. + */ + double get_link_congestion_cost(const NocLink& link) const; + + /** + * @brief This function checks whether the routing configuration for NoC traffic flows + * can cause a deadlock in NoC. Assume we create a graph where NoC routers are vertices, + * and traffic flow routes represent edges. This graph is a sub-graph of the NoC topology + * as it contain a subset of its edges. If such a graph contains a cycle, we can argue + * that deadlock is possible. + * + * This functions performs a DFS over the mentioned graph and tries to find out whether + * the graph has any back edges, i.e. whether a node points to one of its ancestors + * during depth-first search traversal. + * + * @return bool Indicates whether NoC traffic flow routes form a cycle. + */ + bool noc_routing_has_cycle() const; + + /** + * @brief Prints NoC related costs terms and metrics. + * + * @param header The string with which the report starts. + * @param costs Contains NoC-related cost terms. + * @param noc_opts Used to compute total NoC cost. + */ + void print_noc_costs(std::string_view header, + const t_placer_costs& costs, + const t_noc_opts& noc_opts) const; + + private: + /** + * @brief Routes a given traffic flow within the NoC based on where the + * logical cluster blocks in the traffic flow are currently placed. The + * found route is stored and returned externally. + * + * First, the hard routers blocks that represent the placed location of + * the router cluster blocks are identified. Then the traffic flow + * is routed and updated. + * + * Note that this function does not update the link bandwidth utilization. + * update_traffic_flow_link_usage() should be called after this function + * to update the link utilization for the new route. If the flow is re-routed + * because either its source or destination are moved, update_traffic_flow_link_usage() + * should be used to reduce the bandwidth utilization for the old route. + * + * @param traffic_flow_id Represents the traffic flow that needs to be routed + * @param noc_model Contains all the links and routers within the NoC. Used + * to route traffic flows within the NoC. + * @param noc_traffic_flows_storage Contains all the traffic flow information + * within the NoC. Used to get the current traffic flow information. + * @param noc_flows_router The packet routing algorithm used to route traffic + * flows within the NoC. + * @return std::vector& The found route for the traffic flow. + */ + std::vector& route_traffic_flow(NocTrafficFlowId traffic_flow_id, + const NocStorage& noc_model, + const NocTrafficFlows& noc_traffic_flows_storage, + NocRouting& noc_flows_router); + + /** + * @brief Updates the bandwidth usages of links found in a routed traffic flow. + * The link bandwidth usages are either incremented or decremented by the + * bandwidth of the traffic flow. If the traffic flow route is being deleted, + * then the link bandwidth needs to be decremented. If the traffic flow + * route has just been added then the link bandwidth needs to be incremented. + * This function needs to be called everytime a traffic flow has been newly + * routed. + * + * @param traffic_flow_route The routed path for a traffic flow. This + * contains a collection of links in the NoC. + * @param inc_or_dec Determines how the bandwidths of links found + * in the traffic flow route are updated. If it is -1, the route flow has + * been removed and links' bandwidth must be decremented. Otherwise, the a traffic + * flow has been re-routed and its links' bandwidth should be incremented. + * @param traffic_flow_bandwidth The bandwidth of a traffic flow. This will + * be used to update bandwidth usage of the links. + */ + void update_traffic_flow_link_usage(const std::vector& traffic_flow_route, + int inc_or_dec, + double traffic_flow_bandwidth); + + /** + * @brief Goes through all the traffic flows associated to a moved + * logical router cluster block (a traffic flow is associated to a router if + * the router is either a source or sink router of the traffic flow) and + * re-routes them. The new routes are stored and the NoC cost is updated + * to reflect the moved logical router cluster block. + * + * The associated traffic flows and their newly found costs are stored in static + * data structures found in 'noc_place_utils.cpp'. The size of these + * data structures represent the total number of affected traffic flows which is + * also updated within here. + * + * @param moved_router_block_id The logical router cluster block that was moved + * to a new location during placement. + * @param noc_traffic_flows_storage Contains all the traffic flow information + * within the NoC. Used to get the traffic flows associated to logical router + * blocks. + * @param noc_model Contains all the links and routers within the NoC. Used + * to route traffic flows within the NoC. + * @param noc_flows_router The packet routing algorithm used to route traffic + * flows within the NoC. + * @param updated_traffic_flows Keeps track of traffic flows that have been + * re-routed. Used to prevent re-routing the same traffic flow multiple times. + */ + void re_route_associated_traffic_flows(ClusterBlockId moved_router_block_id, + const NocTrafficFlows& noc_traffic_flows_storage, const NocStorage& noc_model, - NocTrafficFlows& noc_traffic_flows_storage, NocRouting& noc_flows_router, - const vtr::vector_map& block_locs); - -/** - * @brief Updates the bandwidth usages of links found in a routed traffic flow. - * The link bandwidth usages are either incremented or decremented by the - * bandwidth of the traffic flow. If the traffic flow route is being deleted, - * then the link bandwidth needs to be decremented. If the traffic flow - * route has just been added then the link bandwidth needs to be incremented. - * This function needs to be called everytime a traffic flow has been newly - * routed. - * - * @param traffic_flow_route The routed path for a traffic flow. This - * contains a collection of links in the NoC. - * @param noc_model Contains all the links and routers within the NoC. Used - * to update link information. - * @param inc_or_dec Determines how the bandwidths of links found - * in the traffic flow route are updated. If it is -1, the route flow has - * been removed and links' bandwidth must be decremented. Otherwise, the a traffic - * flow has been re-routed and its links' bandwidth should be incremented. - * @param traffic_flow_bandwidth The bandwidth of a traffic flow. This will - * be used to update bandwidth usage of the links. - */ -void update_traffic_flow_link_usage(const std::vector& traffic_flow_route, - NocStorage& noc_model, - int inc_or_dec, - double traffic_flow_bandwidth); - -/** - * @brief Goes through all the traffic flows associated to a moved - * logical router cluster block (a traffic flow is associated to a router if - * the router is either a source or sink router of the traffic flow) and - * re-routes them. The new routes are stored and the NoC cost is updated - * to reflect the moved logical router cluster block. - * - * The associated traffic flows and their newly found costs are stored in static - * datastructures found in 'noc_place_utils.cpp'. The size of these - * datastructures represent the total number of affected traffic flows which is - * also updated within here. - * - * @param moved_router_block_id The logical router cluster block that was moved - * to a new location during placement. - * @param noc_traffic_flows_storage Contains all the traffic flow information - * within the NoC. Used to get the traffic flows associated to logical router - * blocks. - * @param noc_model Contains all the links and routers within the NoC. Used - * to route traffic flows within the NoC. - * @param noc_flows_router The packet routing algorithm used to route traffic - * flows within the NoC. - * @param updated_traffic_flows Keeps track of traffic flows that have been - * re-routed. Used to prevent re-routing the same traffic flow multiple times. - * @param block_locs Contains the location where each clustered block is placed at. - */ -void re_route_associated_traffic_flows(ClusterBlockId moved_router_block_id, - NocTrafficFlows& noc_traffic_flows_storage, - NocStorage& noc_model, - NocRouting& noc_flows_router, - std::unordered_set& updated_traffic_flows, - const vtr::vector_map& block_locs); - -/** - * @brief Used to re-route all the traffic flows associated to logical - * router blocks that were supposed to be moved during placement but are - * back to their original positions. - * - * The routing function is called to find the original traffic flow route - * again. - * - * @param blocks_affected Contains all the blocks that were moved in - * the current placement iteration. This includes the cluster ids of - * the moved blocks, their previous locations and their new locations - * after being moved. - * @param block_locs Contains the location where each clustered block is placed at. - */ -void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affected, - const vtr::vector_map& block_locs); - -/** - * @brief Removes the route of a traffic flow and updates the links to indicate - * that the traffic flow does not use them. And then finds - * a new route for the traffic flow and updates the links in the new route to - * indicate that the traffic flow uses them. - * - * @param traffic_flow_id The traffic flow to re-route. - * @param noc_traffic_flows_storage Contains all the traffic flow information - * within the NoC. Used to get the current traffic flow information. - * @param noc_model Contains all the links and routers within the NoC. Used - * to route traffic flows within the NoC. - * @param noc_flows_router The packet routing algorithm used to route traffic - * flows within the NoC. - * @param block_locs Contains the location where each clustered block is placed at. - */ -void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, - NocTrafficFlows& noc_traffic_flows_storage, - NocStorage& noc_model, - NocRouting& noc_flows_router, - const vtr::vector_map& block_locs); - -/** - * @brief Recompute the NoC costs (aggregate bandwidth and latency) by - * accumulating the individual traffic flow costs and then verify - * whether the result is within an error tolerance of the placements - * NoC costs. - * - * During placement, the NoC aggregate bandwidth and latency costs are - * incrementally updated by adding the change in NoC costs caused by - * each accepted move. This function quickly verifies whether - * the incremental changes to the NoC costs are correct by comparing - * the current costs to newly computed costs for the current - * placement state. This function assumes the traffic flows have - * been routed and all the individual NoC costs of all traffic flows are - * correct and only accumulates them to compute the new overall NoC costs. - * - * If the comparison is larger than the error tolerance then it - * implies that the incremental cost updates were incorrect and - * an error is thrown. - * - * This function is not very expensive and can be called regularly during - * placement to ensure the NoC costs do not deviate too far off - * from their correct values. - * - * @param new_noc_aggregate_bandwidth_cost Will store the newly computed - * NoC aggregate bandwidth cost for the current placement state. - * @param new_noc_latency_cost Will store the newly computed - * NoC latency cost for the current placement state. - */ -void recompute_noc_costs(NocCostTerms& new_cost); - -/** - * @brief Updates all the cost normalization factors relevant to the NoC. - * Handles exceptional cases so that the normalization factors do not - * reach INF. - * This is intended to be used to initialize the normalization factors of - * the NoC and also at the outer loop iteration of placement to - * balance the NoC costs with other placement cost parameters. - * - * @param costs Contains the normalization factors which need to be updated - */ -void update_noc_normalization_factors(t_placer_costs& costs); - -/** - * @brief Calculates the aggregate bandwidth of each traffic flow in the NoC - * and initializes local variables that keep track of the traffic flow - * aggregate bandwidths cost. - * Then the total aggregate bandwidth cost is determines by summing up all - * the individual traffic flow aggregate bandwidths. - * - * This should be used after initial placement to determine the starting - * aggregate bandwidth cost of the NoC. - * - * @return double The aggregate bandwidth cost of the NoC. - */ -double comp_noc_aggregate_bandwidth_cost(); - -/** - * @brief Calculates the latency cost of each traffic flow in the NoC - * and initializes local variables that keep track of the traffic flow - * latency costs. Then the total latency cost is determined by summing up all - * the individual traffic flow latency costs. - * - * This should be used after initial placement to determine the starting latency - * cost of the NoC. - * - * @return double The latency cost of the NoC. - */ -std::pair comp_noc_latency_cost(); - -double comp_noc_congestion_cost(); - -/** - * @brief Given a placement state the NoC costs are re-computed - * from scratch and compared to the current NoC placement costs. - * This involves finding new routes for all traffic - * flows and then computing the aggregate bandwidth and latency costs - * for the traffic flows using the newly found routes. Then the - * overall NoC costs are computed by accumulating the newly found traffic flow - * costs. - * - * THe newly computed NoC costs are compared to the current NoC costs to - * check if they are within an error tolerance. If the comparison is - * larger than the error tolerance then an error is thrown and it indicates - * that the incremental NoC costs updates are incorrect. - * - * This function is similar to 'recompute_noc_costs' but the traffic flow - * routes and costs are computed as well. As a result this function is very - * expensive and should be used in larger intervals (number of moves) within - * the placer. - * - * @param costs Contains the current NoC aggregate bandwidth and latency costs - * which are computed incrementally after each logical router move during - * placement. - * @param error_tolerance The maximum allowable difference between the current - * NoC costs and the newly computed NoC costs. - * @param noc_opts Contains information necessary to compute the NoC costs - * from scratch. For example this would include the routing algorithm and - * weights for the difference components of the NoC costs. - * @return An integer which represents the status of the comparison. 0 - * indicates that the current NoC costs are within the error tolerance and - * a non-zero values indicates the current NoC costs are above the error - * tolerance. - * @param block_locs Contains the location where each clustered block is placed at. - */ -int check_noc_placement_costs(const t_placer_costs& costs, - double error_tolerance, - const t_noc_opts& noc_opts, - const vtr::vector_map& block_locs); + std::unordered_set& updated_traffic_flows); + + + /** + * @brief Removes the route of a traffic flow and updates the links to indicate + * that the traffic flow does not use them. And then finds + * a new route for the traffic flow and updates the links in the new route to + * indicate that the traffic flow uses them. + * + * @param traffic_flow_id The traffic flow to re-route. + * @param noc_traffic_flows_storage Contains all the traffic flow information + * within the NoC. Used to get the current traffic flow information. + * @param noc_model Contains all the links and routers within the NoC. Used + * to route traffic flows within the NoC. + * @param noc_flows_router The packet routing algorithm used to route traffic + * flows within the NoC. + */ + void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, + const NocTrafficFlows& noc_traffic_flows_storage, + const NocStorage& noc_model, + NocRouting& noc_flows_router); + + public: + /** + * @brief Represents the maximum values of the NoC cost normalization factors + * @details We need to handle the case where the aggregate bandwidth is 0, + * so we set this to some arbitrary positive number that is greater than 1.e-9, + * since that is the range we expect the normalization factor to be (in Gbps) + */ + static constexpr double MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST = 1.; + + /** + * @brief Represents the lowest possible latency cost. + * @details We don't expect the noc_latency cost to ever go below 1 pico second. + */ + static constexpr double MIN_EXPECTED_NOC_LATENCY_COST = 1.e-12; + + /** + * @brief Represents the maximum possible 1/latency value. + * @details We expect the latency costs to be in the nano-second range, and we don't expect it to go lower than that. + * So if the latency costs go below the pico-second range we trim the normalization value to be no higher than 1/ps + * This should be updated if the delays become lower. + */ + static constexpr double MAX_INV_NOC_LATENCY_COST = 1.e12; + + + /** + * @brief Represents the minimum link bandwidth over-utilization for that link to be considered congested. + * @details If a link is overloaded by less than 0.1% of the link bandwidth capacity, + * we assume it is not congested. + */ + static constexpr double MIN_EXPECTED_NOC_CONGESTION_COST = 1.e-3; + + /** + * @brief Represents the maximum value of 1/link bandwidth over-utilization for that link to be considered congested. + * @details The congestion cost for a link is measured as the proportion of the overloaded BW to the link capacity. + * We assume that when a link congested, it is overloaded with at least 0.1% of its BW capacity + */ + static constexpr double MAX_INV_NOC_CONGESTION_COST = 1.e3; + + /** + * @brief Represents an invalid congestion cost value. + */ + static constexpr double INVALID_NOC_COST_TERM = -1.0; + + private: + /** + * @brief Each traffic flow cost consists of two components: + * 1) traffic flow aggregate bandwidth (sum over all used links of the traffic flow bandwidth) + * 2) traffic flow latency (currently unloaded/best-case latency of the flow) + * 3) traffic flow latency overrun (how much the latency is higher than the + * latency constraint for a traffic flow. + * NoC placement code will keep an array-of-struct to easily access each + * traffic flow cost. + */ + struct TrafficFlowPlaceCost { + double aggregate_bandwidth = INVALID_NOC_COST_TERM; + double latency = INVALID_NOC_COST_TERM; + double latency_overrun = INVALID_NOC_COST_TERM; + }; + + const vtr::vector_map& block_locs_ref; + + /// Proposed and actual cost of a noc traffic flow used for each move assessment */ + vtr::vector traffic_flow_costs, proposed_traffic_flow_costs; + + /// Keeps track of traffic flows that have been updated at each attempted placement move*/ + std::vector affected_traffic_flows; + + /// Proposed and actual congestion cost of a NoC link used for each move assessment + vtr::vector link_congestion_costs, proposed_link_congestion_costs; + + /// Keeps track of NoC links whose bandwidth usage have been updated at each attempted placement move*/ + std::unordered_set affected_noc_links; + + /** + * @brief Stores the routes that were found by the routing algorithm for + * all traffic flows within the NoC. This data structure should be used + * to store the path found whenever a traffic flow needs to be routed/ + * re-routed. Also, this data structure should be used to access the routed + * path of a traffic flow. + */ + vtr::vector> traffic_flow_routes; + + /** + * @brief Stores previous routes for NoC routers involved in a swap that is still + * to be committed. + * @details When a NoC router is swapped, its associated traffic flows are re-routed + * to compute the new NoC-related cost terms required to evaluate the swap. If the swap + * is rejected, the swapped NoC routers is moved back to its location. This data structure + * stores the old routes before a swap is applied so that we can easily revert traffic flow + * routes without re-routing them from scratch. + */ + vtr::vector> traffic_flow_routes_backup; + + ///Represents the bandwidth of the data being transmitted on each link in the NoC. Units in bits-per-second(bps) + vtr::vector link_bandwidth_usages; +}; /** * @brief Determines the aggregate bandwidth cost of a routed traffic flow. @@ -411,17 +597,6 @@ std::pair calculate_traffic_flow_latency_cost(const std::vector< const NocStorage& noc_model, const t_noc_traffic_flow& traffic_flow_info); -/** - * @brief Determines the congestion cost a NoC link. The cost - * is calculating by measuring how much the current bandwidth - * going through the link exceeds the link's bandwidth capacity. - * - * @param link The NoC link for which the congestion cost is - * to be computed - * @return The computed congestion cost for the given NoC link. - */ -double calculate_link_congestion_cost(const NocLink& link); - /** * @brief The user passes weighting factors for aggregate latency * and latency overrun terms. The weighting factor for aggregate @@ -451,66 +626,7 @@ double calculate_noc_cost(const NocCostTerms& cost_terms, const NocCostTerms& norm_factors, const t_noc_opts& noc_opts); -/** - * @brief Goes through all the traffic flows and determines whether the - * latency constraints have been met for each traffic flow. - * - * @return The total number of traffic flows with latency constraints being met - */ -int get_number_of_traffic_flows_with_latency_cons_met(); -/** - * @brief Goes through all NoC links and counts the congested ones. - * A congested NoC link is a link whose used bandwidth exceeds its - * bandwidth capacity. - * - * @return The total number of congested NoC links. - */ -int get_number_of_congested_noc_links(); - -/** - * @brief Goes through all NoC links and determines whether they - * are congested or not. Then adds up the congestion ratio of all - * congested links. - * - * @return The total congestion ratio - */ -double get_total_congestion_bandwidth_ratio(); - -/** - * @brief Goes through all NoC links and determines whether they - * are congested or not. Then finds n links that are most congested. - * - * @return n links with highest congestion ratio - */ -std::vector get_top_n_congested_links(int n); - -/** - * @brief Goes through all NoC links and determines whether they - * are congested or not. Then finds n links that are most congested. - * - * @return n highest congestion ratios - */ -std::vector get_top_n_congestion_ratios(int n); - -/** - * @brief There are a number of static datastructures which are local - * to 'noc_place_utils.cpp'. THe purpose of these datastructures is - * to keep track of the NoC costs for all traffic flows and temporarily - * store the status of the NoC placement after each move. We create and - * initialize the datastructures here. - * - * This should be called before starting the simulated annealing placement. - */ -void allocate_and_load_noc_placement_structs(); - -/** - * @brief We delete the static datastructures which were created in - * 'allocate_and_load_noc_placement_structs'. - * - * This should be called after placement is finished. - */ -void free_noc_placement_structs(); /* Below are functions related to the feature that forces to the placer to swap router blocks for a certain percentage of the total number of swaps */ @@ -565,22 +681,7 @@ e_create_move propose_router_swap(t_pl_blocks_to_be_moved& blocks_affected, void write_noc_placement_file(const std::string& file_name, const vtr::vector_map& block_locs); -/** - * @brief This function checks whether the routing configuration for NoC traffic flows - * can cause a deadlock in NoC. Assume we create a graph where NoC routers are vertices, - * and traffic flow routes represent edges. This graph is a sub-graph of the NoC topology - * as it contain a subset of its edges. If such a graph contains a cycle, we can argue - * that deadlock is possible. - * - * This functions performs a DFS over the mentioned graph and tries to find out whether - * the graph has any back edges, i.e. whether a node points to one of its ancestors - * during depth-first search traversal. - * - * @param block_locs Contains the location where each clustered block is placed at. - * - * @return bool Indicates whether NoC traffic flow routes form a cycle. - */ -bool noc_routing_has_cycle(const vtr::vector_map& block_locs); + /** * @brief Check if the channel dependency graph created from the given traffic flow routes @@ -605,13 +706,5 @@ bool noc_routing_has_cycle(const vtr::vector& noc_cost_handler); -static void free_placement_structs(const t_noc_opts& noc_opts); +static void free_placement_structs(); static e_move_result try_swap(const t_annealing_state* state, t_placer_costs* costs, @@ -210,7 +208,8 @@ static e_move_result try_swap(const t_annealing_state* state, bool manual_move_enabled, t_swap_stats& swap_stats, PlacerState& placer_state, - NetCostHandler& net_cost_handler); + NetCostHandler& net_cost_handler, + std::optional& noc_cost_handler); static void check_place(const t_placer_costs& costs, @@ -219,7 +218,8 @@ static void check_place(const t_placer_costs& costs, const t_place_algorithm& place_algorithm, const t_noc_opts& noc_opts, PlacerState& placer_state, - NetCostHandler& net_cost_handler); + NetCostHandler& net_cost_handler, + const std::optional& noc_cost_handler); static int check_placement_costs(const t_placer_costs& costs, const PlaceDelayModel* delay_model, @@ -249,7 +249,8 @@ static float starting_t(const t_annealing_state* state, MoveTypeStat& move_type_stat, t_swap_stats& swap_stats, PlacerState& placer_state, - NetCostHandler& net_cost_handler); + NetCostHandler& net_cost_handler, + std::optional& noc_cost_handler); static int count_connections(); @@ -269,7 +270,10 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, static e_move_result assess_swap(double delta_c, double t); -static void update_placement_cost_normalization_factors(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts); +static void update_placement_cost_normalization_factors(t_placer_costs* costs, + const t_placer_opts& placer_opts, + const t_noc_opts& noc_opts, + const std::optional& noc_cost_handler); static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts); @@ -284,7 +288,8 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, PlacerSetupSlacks* setup_slacks, NetPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info, - PlacerState& placer_state); + PlacerState& placer_state, + const std::optional& noc_cost_handler); static void placement_inner_loop(const t_annealing_state* state, const t_placer_opts& placer_opts, @@ -306,7 +311,8 @@ static void placement_inner_loop(const t_annealing_state* state, float timing_bb_factor, t_swap_stats& swap_stats, PlacerState& placer_state, - NetCostHandler& net_cost_handler); + NetCostHandler& net_cost_handler, + std::optional& noc_cost_handler); static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, const t_analysis_opts& analysis_opts, @@ -427,8 +433,17 @@ void try_place(const Netlist<>& net_list, const auto& p_timing_ctx = placer_state.timing(); const auto& p_runtime_ctx = placer_state.runtime(); - NetCostHandler net_cost_handler = alloc_and_load_placement_structs(placer_opts, noc_opts, - directs, num_directs, placer_state); + + std::optional noc_cost_handler; + // create cost handler objects + NetCostHandler net_cost_handler = alloc_and_load_placement_structs(placer_opts, noc_opts, directs, + num_directs, placer_state, noc_cost_handler); + +#ifndef NO_GRAPHICS + if (noc_cost_handler.has_value()) { + get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(noc_cost_handler->get_link_bandwidth_usages()); + } +#endif ManualMoveGenerator manual_move_generator(placer_state); @@ -438,7 +453,8 @@ void try_place(const Netlist<>& net_list, normalize_noc_cost_weighting_factor(const_cast(noc_opts)); } - initial_placement(placer_opts, placer_opts.constraints_file.c_str(), noc_opts, blk_loc_registry); + initial_placement(placer_opts, placer_opts.constraints_file.c_str(), + noc_opts, blk_loc_registry, noc_cost_handler); //create the move generator based on the chosen strategy auto [move_generator, move_generator2] = create_move_generators(placer_state, placer_opts, move_lim, noc_opts.noc_centroid_weight); @@ -566,13 +582,15 @@ void try_place(const Netlist<>& net_list, } if (noc_opts.noc) { + VTR_ASSERT(noc_cost_handler.has_value()); + // get the costs associated with the NoC - costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost(); - std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = comp_noc_latency_cost(); - costs.noc_cost_terms.congestion = comp_noc_congestion_cost(); + costs.noc_cost_terms.aggregate_bandwidth = noc_cost_handler->comp_noc_aggregate_bandwidth_cost(); + std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = noc_cost_handler->comp_noc_latency_cost(); + costs.noc_cost_terms.congestion = noc_cost_handler->comp_noc_congestion_cost(); // initialize all the noc normalization factors - update_noc_normalization_factors(costs); + noc_cost_handler->update_noc_normalization_factors(costs); } // set the starting total placement cost @@ -585,13 +603,16 @@ void try_place(const Netlist<>& net_list, placer_opts.place_algorithm, noc_opts, placer_state, - net_cost_handler); + net_cost_handler, + noc_cost_handler); //Initial placement statistics VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost, costs.bb_cost, costs.timing_cost); if (noc_opts.noc) { - print_noc_costs("Initial NoC Placement Costs", costs, noc_opts); + VTR_ASSERT(noc_cost_handler.has_value()); + + noc_cost_handler->print_noc_costs("Initial NoC Placement Costs", costs, noc_opts); } if (placer_opts.place_algorithm.is_timing_driven()) { VTR_LOG( @@ -681,7 +702,7 @@ void try_place(const Netlist<>& net_list, placer_setup_slacks.get(), timing_info.get(), *move_generator, manual_move_generator, pin_timing_invalidator.get(), blocks_affected, placer_opts, noc_opts, move_type_stat, - swap_stats, placer_state, net_cost_handler); + swap_stats, placer_state, net_cost_handler, noc_cost_handler); if (!placer_opts.move_stats_file.empty()) { f_move_stats_file = std::unique_ptr( @@ -724,7 +745,7 @@ void try_place(const Netlist<>& net_list, state.crit_exponent, &outer_crit_iter_count, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), pin_timing_invalidator.get(), - timing_info.get(), placer_state); + timing_info.get(), placer_state, noc_cost_handler); if (placer_opts.place_algorithm.is_timing_driven()) { critical_path = timing_info->least_slack_critical_path(); @@ -754,8 +775,9 @@ void try_place(const Netlist<>& net_list, *current_move_generator, manual_move_generator, blocks_affected, timing_info.get(), placer_opts.place_algorithm, move_type_stat, - timing_bb_factor, swap_stats, - placer_state, net_cost_handler); + timing_bb_factor, swap_stats, placer_state, + net_cost_handler, noc_cost_handler); + //move the update used move_generator to its original variable update_move_generator(move_generator, move_generator2, agent_state, @@ -805,7 +827,7 @@ void try_place(const Netlist<>& net_list, state.crit_exponent, &outer_crit_iter_count, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), pin_timing_invalidator.get(), - timing_info.get(), placer_state); + timing_info.get(), placer_state, noc_cost_handler); //move the appropriate move_generator to be the current used move generator assign_current_move_generator(move_generator, move_generator2, @@ -821,8 +843,8 @@ void try_place(const Netlist<>& net_list, *current_move_generator, manual_move_generator, blocks_affected, timing_info.get(), placer_opts.place_quench_algorithm, move_type_stat, - timing_bb_factor, - swap_stats, placer_state, net_cost_handler); + timing_bb_factor, swap_stats, placer_state, + net_cost_handler, noc_cost_handler); //move the update used move_generator to its original variable @@ -862,7 +884,7 @@ void try_place(const Netlist<>& net_list, restore_best_placement(placer_state, placement_checkpoint, timing_info, costs, placer_criticalities, placer_setup_slacks, place_delay_model, - pin_timing_invalidator, crit_params, noc_opts); + pin_timing_invalidator, crit_params, noc_cost_handler); if (placer_opts.placement_saves_per_temperature >= 1) { std::string filename = vtr::string_fmt("placement_%03d_%03d.place", @@ -891,7 +913,8 @@ void try_place(const Netlist<>& net_list, placer_opts.place_algorithm, noc_opts, placer_state, - net_cost_handler); + net_cost_handler, + noc_cost_handler); //Some stats VTR_LOG("\n"); @@ -934,7 +957,8 @@ void try_place(const Netlist<>& net_list, costs.bb_cost, costs.timing_cost); // print the noc costs info if (noc_opts.noc) { - print_noc_costs("\nNoC Placement Costs", costs, noc_opts); + VTR_ASSERT(noc_cost_handler.has_value()); + noc_cost_handler->print_noc_costs("\nNoC Placement Costs", costs, noc_opts); #ifdef ENABLE_NOC_SAT_ROUTING if (costs.noc_cost_terms.congestion > 0.0) { @@ -956,7 +980,7 @@ void try_place(const Netlist<>& net_list, write_noc_placement_file(noc_opts.noc_placement_file_name, blk_loc_registry.block_locs()); } - free_placement_structs(noc_opts); + free_placement_structs(); print_timing_stats("Placement Quench", post_quench_timing_stats, pre_quench_timing_stats); @@ -984,7 +1008,8 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, PlacerSetupSlacks* setup_slacks, NetPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info, - PlacerState& placer_state) { + PlacerState& placer_state, + const std::optional& noc_cost_handler) { if (placer_opts.place_algorithm.is_timing_driven()) { /*at each temperature change we update these values to be used */ /*for normalizing the tradeoff between timing and wirelength (bb) */ @@ -1010,7 +1035,7 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, } /* Update the cost normalization factors */ - update_placement_cost_normalization_factors(costs, placer_opts, noc_opts); + update_placement_cost_normalization_factors(costs, placer_opts, noc_opts, noc_cost_handler); } /* Function which contains the inner loop of the simulated annealing */ @@ -1034,7 +1059,8 @@ static void placement_inner_loop(const t_annealing_state* state, float timing_bb_factor, t_swap_stats& swap_stats, PlacerState& placer_state, - NetCostHandler& net_cost_handler) { + NetCostHandler& net_cost_handler, + std::optional& noc_cost_handler) { //How many times have we dumped placement to a file this temperature? int inner_placement_save_count = 0; @@ -1049,7 +1075,7 @@ static void placement_inner_loop(const t_annealing_state* state, blocks_affected, delay_model, criticalities, setup_slacks, placer_opts, noc_opts, move_type_stat, place_algorithm, timing_bb_factor, manual_move_enabled, swap_stats, - placer_state, net_cost_handler); + placer_state, net_cost_handler, noc_cost_handler); if (swap_result == ACCEPTED) { /* Move was accepted. Update statistics that are useful for the annealing schedule. */ @@ -1092,9 +1118,12 @@ static void placement_inner_loop(const t_annealing_state* state, */ ++(*moves_since_cost_recompute); if (*moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) { - //VTR_LOG("recomputing costs from scratch, old bb_cost is %g\n", costs->bb_cost); - net_cost_handler.recompute_costs_from_scratch(noc_opts, delay_model, criticalities, costs); - //VTR_LOG("new_bb_cost is %g\n", costs->bb_cost); + net_cost_handler.recompute_costs_from_scratch(delay_model, criticalities, *costs); + + if (noc_cost_handler.has_value()) { + noc_cost_handler->recompute_costs_from_scratch(noc_opts, *costs); + } + *moves_since_cost_recompute = 0; } @@ -1147,7 +1176,8 @@ static float starting_t(const t_annealing_state* state, MoveTypeStat& move_type_stat, t_swap_stats& swap_stats, PlacerState& placer_state, - NetCostHandler& net_cost_handler) { + NetCostHandler& net_cost_handler, + std::optional& noc_cost_handler) { if (annealing_sched.type == USER_SCHED) { return (annealing_sched.init_t); } @@ -1181,7 +1211,7 @@ static float starting_t(const t_annealing_state* state, blocks_affected, delay_model, criticalities, setup_slacks, placer_opts, noc_opts, move_type_stat, placer_opts.place_algorithm, REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled, swap_stats, - placer_state, net_cost_handler); + placer_state, net_cost_handler, noc_cost_handler); if (swap_result == ACCEPTED) { @@ -1254,15 +1284,14 @@ static e_move_result try_swap(const t_annealing_state* state, bool manual_move_enabled, t_swap_stats& swap_stats, PlacerState& placer_state, - NetCostHandler& net_cost_handler) { + NetCostHandler& net_cost_handler, + std::optional& noc_cost_handler) { /* Picks some block and moves it to another spot. If this spot is * * occupied, switch the blocks. Assess the change in cost function. * * rlim is the range limiter. * * Returns whether the swap is accepted, rejected or aborted. * * Passes back the new value of the cost functions. */ - auto& blk_loc_registry = placer_state.mutable_blk_loc_registry(); - const auto& block_locs = placer_state.block_locs(); float rlim_escape_fraction = placer_opts.rlim_escape_fraction; float timing_tradeoff = placer_opts.timing_tradeoff; @@ -1418,9 +1447,10 @@ static e_move_result try_swap(const t_annealing_state* state, } NocCostTerms noc_delta_c; // change in NoC cost - /* Update the NoC datastructure and costs*/ + /* Update the NoC data structure and costs*/ if (noc_opts.noc) { - find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c, block_locs); + VTR_ASSERT_SAFE(noc_cost_handler.has_value()); + noc_cost_handler->find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c); // Include the NoC delta costs in the total cost change for this swap delta_c += calculate_noc_cost(noc_delta_c, costs->noc_cost_norm_factors, noc_opts); @@ -1472,8 +1502,8 @@ static e_move_result try_swap(const t_annealing_state* state, if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat ++move_type_stat.accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; } - if (noc_opts.noc) { - commit_noc_costs(); + if (noc_opts.noc){ + noc_cost_handler->commit_noc_costs(); *costs += noc_delta_c; } @@ -1525,7 +1555,7 @@ static e_move_result try_swap(const t_annealing_state* state, } /* Revert the traffic flow routes within the NoC*/ if (noc_opts.noc) { - revert_noc_traffic_flow_routes(blocks_affected, block_locs); + noc_cost_handler->revert_noc_traffic_flow_routes(blocks_affected); } } @@ -1604,14 +1634,18 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, * @param costs Contains the normalization factors which need to be updated * @param placer_opts Determines the placement mode * @param noc_opts Determines if placement includes the NoC + * @param noc_cost_handler Computes normalization factors for NoC-related cost terms */ -static void update_placement_cost_normalization_factors(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) { +static void update_placement_cost_normalization_factors(t_placer_costs* costs, + const t_placer_opts& placer_opts, + const t_noc_opts& noc_opts, + const std::optional& noc_cost_handler) { /* Update the cost normalization factors */ costs->update_norm_factors(); // update the noc normalization factors if the placement includes the NoC if (noc_opts.noc) { - update_noc_normalization_factors(*costs); + noc_cost_handler->update_noc_normalization_factors(*costs); } // update the current total placement cost @@ -1812,7 +1846,8 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac const t_noc_opts& noc_opts, t_direct_inf* directs, int num_directs, - PlacerState& placer_state) { + PlacerState& placer_state, + std::optional& noc_cost_handler) { const auto& device_ctx = g_vpr_ctx.device(); const auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.mutable_placement(); @@ -1881,27 +1916,22 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac place_ctx.pl_macros = alloc_and_load_placement_macros(directs, num_directs); + place_ctx.compressed_block_grids = create_compressed_block_grids(); + if (noc_opts.noc) { - allocate_and_load_noc_placement_structs(); + noc_cost_handler.emplace(placer_state.block_locs()); } - - place_ctx.compressed_block_grids = create_compressed_block_grids(); - - return {placer_opts, placer_state, num_nets, place_ctx.cube_bb}; + return NetCostHandler{placer_opts, placer_state, num_nets, place_ctx.cube_bb}; } /* Frees the major structures needed by the placer (and not needed * * elsewhere). */ -static void free_placement_structs(const t_noc_opts& noc_opts) { +static void free_placement_structs() { free_placement_macros_structs(); auto& place_ctx = g_vpr_ctx.mutable_placement(); vtr::release_memory(place_ctx.compressed_block_grids); - - if (noc_opts.noc) { - free_noc_placement_structs(); - } } static void check_place(const t_placer_costs& costs, @@ -1910,7 +1940,8 @@ static void check_place(const t_placer_costs& costs, const t_place_algorithm& place_algorithm, const t_noc_opts& noc_opts, PlacerState& placer_state, - NetCostHandler& net_cost_handler) { + NetCostHandler& net_cost_handler, + const std::optional& noc_cost_handler) { /* Checks that the placement has not confused our data structures. * * i.e. the clb and block structures agree about the locations of * * every block, blocks are in legal spots, etc. Also recomputes * @@ -1925,9 +1956,9 @@ static void check_place(const t_placer_costs& costs, if (noc_opts.noc) { // check the NoC costs during placement if the user is using the NoC supported flow - error += check_noc_placement_costs(costs, ERROR_TOL, noc_opts, placer_state.block_locs()); + error += noc_cost_handler->check_noc_placement_costs(costs, PL_INCREMENTAL_COST_TOLERANCE, noc_opts); // make sure NoC routing configuration does not create any cycles in CDG - error += (int)noc_routing_has_cycle(placer_state.block_locs()); + error += (int)noc_cost_handler->noc_routing_has_cycle(); } if (error == 0) { @@ -1953,7 +1984,7 @@ static int check_placement_costs(const t_placer_costs& costs, double bb_cost_check = net_cost_handler.comp_bb_cost(e_cost_methods::CHECK); - if (fabs(bb_cost_check - costs.bb_cost) > costs.bb_cost * ERROR_TOL) { + if (fabs(bb_cost_check - costs.bb_cost) > costs.bb_cost * PL_INCREMENTAL_COST_TOLERANCE) { VTR_LOG_ERROR( "bb_cost_check: %g and bb_cost: %g differ in check_place.\n", bb_cost_check, costs.bb_cost); @@ -1963,7 +1994,7 @@ static int check_placement_costs(const t_placer_costs& costs, if (place_algorithm.is_timing_driven()) { comp_td_costs(delay_model, *criticalities, placer_state, &timing_cost_check); //VTR_LOG("timing_cost recomputed from scratch: %g\n", timing_cost_check); - if (fabs(timing_cost_check - costs.timing_cost) > costs.timing_cost * ERROR_TOL) { + if (fabs(timing_cost_check - costs.timing_cost) > costs.timing_cost * PL_INCREMENTAL_COST_TOLERANCE) { VTR_LOG_ERROR( "timing_cost_check: %g and timing_cost: %g differ in check_place.\n", timing_cost_check, costs.timing_cost); @@ -2276,6 +2307,6 @@ static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry) { #ifndef NO_GRAPHICS // update the graphics' reference to placement location variables - set_graphics_blk_loc_registry_ref(global_blk_loc_registry); + get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry); #endif } diff --git a/vpr/src/place/place_checkpoint.cpp b/vpr/src/place/place_checkpoint.cpp index ed13382a95f..85f4ab28e18 100644 --- a/vpr/src/place/place_checkpoint.cpp +++ b/vpr/src/place/place_checkpoint.cpp @@ -45,7 +45,7 @@ void restore_best_placement(PlacerState& placer_state, std::unique_ptr& place_delay_model, std::unique_ptr& pin_timing_invalidator, PlaceCritParams crit_params, - const t_noc_opts& noc_opts) { + std::optional& noc_cost_handler) { /* The (valid) checkpoint is restored if the following conditions are met: * 1) The checkpoint has a lower critical path delay. * 2) The checkpoint's wire-length cost is either better than the current solution, @@ -74,8 +74,9 @@ void restore_best_placement(PlacerState& placer_state, * internal data structures that are used to keep track of each flow's cost are no longer valid, * and need to be re-computed from scratch. */ - if (noc_opts.noc) { - reinitialize_noc_routing(costs, {}, placer_state.block_locs()); + if (noc_cost_handler.has_value()) { + VTR_ASSERT(noc_cost_handler->points_to_same_block_locs(placer_state.block_locs())); + noc_cost_handler->reinitialize_noc_routing(costs, {}); } VTR_LOG("\nCheckpoint restored\n"); diff --git a/vpr/src/place/place_checkpoint.h b/vpr/src/place/place_checkpoint.h index cd197beb5bb..8c2313e7117 100644 --- a/vpr/src/place/place_checkpoint.h +++ b/vpr/src/place/place_checkpoint.h @@ -11,6 +11,8 @@ #include "place_delay_model.h" #include "place_timing_update.h" +class NocCostHandler; + /** * @brief Data structure that stores the placement state and saves it as a checkpoint. * @@ -74,5 +76,6 @@ void restore_best_placement(PlacerState& placer_state, std::unique_ptr& placer_setup_slacks, std::unique_ptr& place_delay_model, std::unique_ptr& pin_timing_invalidator, - PlaceCritParams crit_params, const t_noc_opts& noc_opts); + PlaceCritParams crit_params, + std::optional& noc_cost_handler); #endif diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index 3541ef01bf1..df96e911d37 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -55,7 +55,7 @@ void t_placer_costs::update_norm_factors() { timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST); } else { VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE); - bb_cost_norm = 1 / bb_cost; //Upading the normalization factor in bounding box mode since the cost in this mode is determined after normalizing the wirelength cost + bb_cost_norm = 1 / bb_cost; //Updating the normalization factor in bounding box mode since the cost in this mode is determined after normalizing the wirelength cost } } diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h index 934d2072251..9c3714b0488 100644 --- a/vpr/src/place/place_util.h +++ b/vpr/src/place/place_util.h @@ -14,7 +14,11 @@ #include "vtr_vector_map.h" #include "globals.h" - +/** + * @brief The error tolerance due to round off for the total cost computation. + * When we check it from scratch vs. incrementally. 0.01 means that there is a 1% error tolerance. + */ +constexpr double PL_INCREMENTAL_COST_TOLERANCE = .01; // forward declaration of t_placer_costs so that it can be used an argument // in NocCostTerms constructor diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index 1764de7c78f..06fb05911a8 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -163,7 +163,6 @@ static int get_longest_segment_length(std::vector& segment_inf); static void fix_empty_coordinates(vtr::NdMatrix& delta_delays); static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays); - static float find_neighboring_average(vtr::NdMatrix& matrix, int from_layer, t_physical_tile_loc to_tile_loc, @@ -873,7 +872,7 @@ static vtr::NdMatrix compute_delta_delays( } float delay_reduce(std::vector& delays, e_reducer reducer) { - if (delays.size() == 0) { + if (delays.empty()) { return IMPOSSIBLE_DELTA; } else if (delays.size() == 1) { return delays[0]; @@ -921,15 +920,13 @@ static float find_neighboring_average( int endx = matrix.end_index(2); int endy = matrix.end_index(3); - int delx, dely; - int x = to_tile_loc.x; int y = to_tile_loc.y; int to_layer = to_tile_loc.layer_num; for (int distance = 1; distance <= max_distance; ++distance) { - for (delx = x - distance; delx <= x + distance; delx++) { - for (dely = y - distance; dely <= y + distance; dely++) { + for (int delx = x - distance; delx <= x + distance; delx++) { + for (int dely = y - distance; dely <= y + distance; dely++) { // Check distance constraint if (abs(delx - x) + abs(dely - y) > distance) { continue; @@ -963,7 +960,6 @@ static void fix_empty_coordinates(vtr::NdMatrix& delta_delays) { // would return a result, so we fill in the empty holes with a small // neighbour average. constexpr int kMaxAverageDistance = 2; - for (int from_layer = 0; from_layer < (int)delta_delays.dim_size(0); ++from_layer) { for (int to_layer = 0; to_layer < (int)delta_delays.dim_size(1); ++to_layer) { for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) { @@ -998,7 +994,7 @@ static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays) } static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays) { - // Set any impossible delta's to the average of it's neighbours + // Set any impossible delta's to the average of its neighbours // // Impossible coordinates may occur if an IPIN cannot be reached from the // sampling OPIN. This might occur if the IPIN or OPIN used for sampling @@ -1008,9 +1004,8 @@ static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays) { // // A max average distance of 5 is used to provide increased effort in // filling these gaps. It is more important to have a poor predication, - // than a invalid value and causing a slack assertion. + // than an invalid value and causing a slack assertion. constexpr int kMaxAverageDistance = 5; - for (int from_layer_num = 0; from_layer_num < (int)delta_delays.dim_size(0); ++from_layer_num) { for (int to_layer_num = 0; to_layer_num < (int)delta_delays.dim_size(1); ++to_layer_num) { for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) { diff --git a/vpr/test/test_bfs_routing.cpp b/vpr/test/test_bfs_routing.cpp index c9e527d7772..c0c03a9ad1d 100644 --- a/vpr/test/test_bfs_routing.cpp +++ b/vpr/test/test_bfs_routing.cpp @@ -22,7 +22,7 @@ TEST_CASE("test_route_flow", "[vpr_noc_bfs_routing]") { * */ - // Create the NoC datastructure + // Create the NoC data structure NocStorage noc_model; // store the reference to device grid with diff --git a/vpr/test/test_clustered_netlist.cpp b/vpr/test/test_clustered_netlist.cpp index f3094fa2b7a..cd0059c173e 100644 --- a/vpr/test/test_clustered_netlist.cpp +++ b/vpr/test/test_clustered_netlist.cpp @@ -23,7 +23,7 @@ TEST_CASE("test_find_block_with_matching_name", "[vpr_clustered_netlist]") { t_pb router_pb; t_pb i_o_pb; - // datastructure to keep track of blocks name to its id + // data structure to keep track of blocks name to its id std::map block_id_from_name; // need to create the cluster netlist object that will hold the blocks @@ -37,7 +37,7 @@ TEST_CASE("test_find_block_with_matching_name", "[vpr_clustered_netlist]") { char io_port_three[] = "io_port_three"; char io_port_four[] = "io_port_four"; - // datastructure to store all the cluster block IDs of the noc router logical block type clusters + // data structure to store all the cluster block IDs of the noc router logical block type clusters std::vector noc_router_logical_type_clusters; // add the io blocks to the netlist diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp index bcbba60a45e..59e8ca70cf2 100644 --- a/vpr/test/test_noc_place_utils.cpp +++ b/vpr/test/test_noc_place_utils.cpp @@ -26,12 +26,12 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") { // this sets the range of possible bandwidths for a traffic flow std::uniform_int_distribution dist_2(0, 1000); - // get global datastructures + // get global data structures auto& noc_ctx = g_vpr_ctx.mutable_noc(); auto& place_ctx = g_vpr_ctx.mutable_placement(); auto& block_locs = place_ctx.mutable_block_locs(); - // start by deleting any global datastructures (this is so that we don't have corruption from previous tests) + // start by deleting any global data structures (this is so that we don't have corruption from previous tests) noc_ctx.noc_model.clear_noc(); noc_ctx.noc_traffic_flows_storage.clear_traffic_flows(); block_locs.clear(); @@ -112,7 +112,7 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") { -1, hard_router_block.get_router_layer_position()); - // now add the cluster and its placed location to the placement datastructures + // now add the cluster and its placed location to the placement data structures block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); } @@ -160,9 +160,11 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") { // create a local routing algorithm for the unit test auto routing_algorithm = std::make_unique(); + vtr::vector> traffic_flow_routes(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows()); + for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) { const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number); - std::vector& traffic_flow_route = noc_ctx.noc_traffic_flows_storage.get_mutable_traffic_flow_route((NocTrafficFlowId)traffic_flow_number); + std::vector& traffic_flow_route = traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number]; // get the source and sink routers of this traffic flow int source_hard_router_id = (size_t)curr_traffic_flow.source_router_cluster_id; @@ -185,17 +187,19 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") { // now go through the routed traffic flows and update the bandwidths of the links. Once a traffic flow has been processed, we need to clear it so that the test function can update it with the routes it finds for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) { const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number); - std::vector& traffic_flow_route = noc_ctx.noc_traffic_flows_storage.get_mutable_traffic_flow_route((NocTrafficFlowId)traffic_flow_number); + std::vector& traffic_flow_route = traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number]; - for (auto& link : traffic_flow_route) { + for (const NocLinkId link : traffic_flow_route) { golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth; } traffic_flow_route.clear(); } + NocCostHandler noc_cost_handler(block_locs); + // now call the test function - initial_noc_routing({}, block_locs); + noc_cost_handler.initial_noc_routing({}); // now verify the function by comparing the link bandwidths in the noc model (should have been updated by the test function) to the golden set int number_of_links = golden_link_bandwidths.size(); @@ -206,9 +210,8 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") { double golden_congested_bandwidth = std::max(golden_link_bandwidths[current_link_id] - noc_link_bandwidth, 0.0); double golden_congested_bw_ratio = golden_congested_bandwidth / noc_link_bandwidth; - REQUIRE(golden_link_bandwidths[current_link_id] == current_link.get_bandwidth_usage()); - REQUIRE(golden_congested_bandwidth == current_link.get_congested_bandwidth()); - REQUIRE(golden_congested_bw_ratio == current_link.get_congested_bandwidth_ratio()); + REQUIRE(golden_link_bandwidths[current_link_id] == noc_cost_handler.get_link_used_bandwidth(current_link_id)); + REQUIRE(golden_congested_bw_ratio == noc_cost_handler.get_link_congestion_cost(current_link)); } } @@ -224,12 +227,12 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") { std::uniform_real_distribution dist_3(1, 25); std::default_random_engine double_engine; - // get global datastructures + // get global data structures auto& noc_ctx = g_vpr_ctx.mutable_noc(); auto& place_ctx = g_vpr_ctx.mutable_placement(); auto& block_locs = place_ctx.mutable_block_locs(); - // start by deleting any global datastructures (this is so that we don't have corruption from previous tests) + // start by deleting any global data structures (this is so that we don't have corruption from previous tests) noc_ctx.noc_model.clear_noc(); noc_ctx.noc_traffic_flows_storage.clear_traffic_flows(); block_locs.clear(); @@ -310,7 +313,7 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") { -1, hard_router_block.get_router_layer_position()); - // now add the cluster and its placed location to the placement datastructures + // now add the cluster and its placed location to the placement data structures block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); } @@ -355,7 +358,7 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") { noc_ctx.noc_traffic_flows_storage.finished_noc_traffic_flows_setup(); - // need to route all the traffic flows so create a datastructure to store them here + // need to route all the traffic flows so create a data structure to store them here std::vector golden_traffic_flow_route_sizes; golden_traffic_flow_route_sizes.resize(number_of_created_traffic_flows); @@ -370,7 +373,7 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") { for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) { const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number); - std::vector& traffic_flow_route = noc_ctx.noc_traffic_flows_storage.get_mutable_traffic_flow_route((NocTrafficFlowId)traffic_flow_number); + std::vector traffic_flow_route; // get the source and sink routers of this traffic flow int source_hard_router_id = (size_t)curr_traffic_flow.source_router_cluster_id; @@ -384,18 +387,12 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") { // store the number of links in the traffic flow golden_traffic_flow_route_sizes[traffic_flow_number] = traffic_flow_route.size(); - - // delete this traffic flow since we need to have it be routed by the global datastructures - traffic_flow_route.clear(); } - // assume this works - // this is needed to set up the global noc packet router and also global datastructures - initial_noc_routing({}, block_locs); - SECTION("test_comp_noc_aggregate_bandwidth_cost") { - //initialize all the cost calculator datastructures - allocate_and_load_noc_placement_structs(); + //initialize all the cost calculator data structures + NocCostHandler noc_cost_handler(block_locs); + noc_cost_handler.initial_noc_routing({}); // create local variable to store the bandwidth cost double golden_total_noc_bandwidth_costs = 0.; @@ -412,19 +409,17 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") { } // run the test function and get the bandwidth calculated - double found_bandwidth_cost = comp_noc_aggregate_bandwidth_cost(); + double found_bandwidth_cost = noc_cost_handler.comp_noc_aggregate_bandwidth_cost(); // compare the test function bandwidth cost to the golden value // since we are comparing double numbers we allow a tolerance of difference REQUIRE(vtr::isclose(golden_total_noc_bandwidth_costs, found_bandwidth_cost)); - - // release the cost calculator datastructures - free_noc_placement_structs(); } SECTION("test_comp_noc_latency_cost") { - //initialize all the cost calculator datastructures - allocate_and_load_noc_placement_structs(); + //initialize all the cost calculator data structures + NocCostHandler noc_cost_handler(block_locs); + noc_cost_handler.initial_noc_routing({}); // create local variable to store the latency cost terms double golden_total_noc_latency_costs = 0.; @@ -450,39 +445,33 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") { } // run the test function and get the latency cost calculated - auto [found_latency_cost, found_latency_overrun_cost] = comp_noc_latency_cost(); + auto [found_latency_cost, found_latency_overrun_cost] = noc_cost_handler.comp_noc_latency_cost(); // compare the test function latency cost to the golden value // since we are comparing double numbers we allow a tolerance of difference REQUIRE(vtr::isclose(golden_total_noc_latency_costs, found_latency_cost)); REQUIRE(vtr::isclose(golden_total_noc_latency_overrun_costs, found_latency_overrun_cost)); - - // release the cost calculator datastructures - free_noc_placement_structs(); } SECTION("test_comp_noc_congestion_cost") { - //initialize all the cost calculator datastructures - allocate_and_load_noc_placement_structs(); + //initialize all the cost calculator data structures + NocCostHandler noc_cost_handler(block_locs); + noc_cost_handler.initial_noc_routing({}); // create local variable to store the latency cost double golden_total_noc_congestion_costs = 0.; for (const auto& link : noc_ctx.noc_model.get_noc_links()) { - double congested_bw_ratio = link.get_congested_bandwidth_ratio(); - + double congested_bw_ratio = noc_cost_handler.get_link_congestion_cost(link); golden_total_noc_congestion_costs += congested_bw_ratio; } // run the test function to get the congestion cost - double found_congestion_cost = comp_noc_congestion_cost(); + double found_congestion_cost = noc_cost_handler.comp_noc_congestion_cost(); // compare the test function congestion cost to the golden value // since we are comparing double numbers we allow a tolerance of difference REQUIRE(vtr::isclose(golden_total_noc_congestion_costs, found_congestion_cost)); - - // release the cost calculator datastructures - free_noc_placement_structs(); } } @@ -499,12 +488,12 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ std::uniform_real_distribution dist_3(1, 25); std::default_random_engine double_engine; - // get global datastructures + // get global data structures auto& noc_ctx = g_vpr_ctx.mutable_noc(); auto& place_ctx = g_vpr_ctx.mutable_placement(); auto& block_locs = place_ctx.mutable_block_locs(); - // start by deleting any global datastructures (this is so that we don't have corruption from previous tests) + // start by deleting any global data structures (this is so that we don't have corruption from previous tests) noc_ctx.noc_model.clear_noc(); noc_ctx.noc_traffic_flows_storage.clear_traffic_flows(); block_locs.clear(); @@ -594,7 +583,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ router_where_cluster_is_placed.push_back((NocRouterId)cluster_block_number); - // now add the cluster and its placed location to the placement datastructures + // now add the cluster and its placed location to the placement data structures block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); } @@ -677,13 +666,14 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ noc_ctx.noc_model); } + NocCostHandler noc_cost_handler(block_locs); // assume this works - // this is needed to set up the global noc packet router and also global datastructures - initial_noc_routing({}, block_locs); + // this is needed to set up the global noc packet router and also global data structures + noc_cost_handler.initial_noc_routing({}); - // datastructure below will store the bandwidth usages of all the links + // data structure below will store the bandwidth usages of all the links // and will be updated throughout this test. - // These link bandwidths will then be compared to link bandwidths in the NoC datastructure (these will be updated in the test function) + // These link bandwidths will then be compared to link bandwidths in the NoC data structure (these will be updated in the test function) vtr::vector golden_link_bandwidths; golden_link_bandwidths.resize(noc_ctx.noc_model.get_noc_links().size(), 0.0); @@ -721,18 +711,15 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ test_noc_costs.congestion += golden_link_congestion_costs[link_id]; } - // initialize noc placement structs - allocate_and_load_noc_placement_structs(); - // We need to run these functions as they initialize local variables needed to run the test function within this unit test. we assume this is correct - comp_noc_aggregate_bandwidth_cost(); - comp_noc_latency_cost(); - comp_noc_congestion_cost(); + noc_cost_handler.comp_noc_aggregate_bandwidth_cost(); + noc_cost_handler.comp_noc_latency_cost(); + noc_cost_handler.comp_noc_congestion_cost(); - // datastructure that keeps track of moved blocks during placement + // data structure that keeps track of moved blocks during placement t_pl_blocks_to_be_moved blocks_affected(NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST); - // datastructure that keeps track of all the traffic flows that have been re-routed + // data structure that keeps track of all the traffic flows that have been re-routed std::unordered_set routed_traffic_flows; /* Now we imitate placement here by swapping two clusters block @@ -750,7 +737,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ swap_router_block_two = (ClusterBlockId)dist(rand_num_gen); } while (swap_router_block_one == swap_router_block_two); - //set up the moved blocks datastructure for the test function + //set up the moved blocks data structure for the test function blocks_affected.moved_blocks.resize(2); blocks_affected.moved_blocks[0].block_num = swap_router_block_one; @@ -774,11 +761,9 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); // swap the hard router blocks where the two cluster blocks are placed on - NocRouterId router_first_swap_cluster_location = router_where_cluster_is_placed[swap_router_block_one]; - router_where_cluster_is_placed[swap_router_block_one] = router_where_cluster_is_placed[swap_router_block_two]; - router_where_cluster_is_placed[swap_router_block_two] = router_first_swap_cluster_location; + std::swap(router_where_cluster_is_placed[swap_router_block_one], router_where_cluster_is_placed[swap_router_block_two]); - // now move the blocks in the placement datastructures + // now move the blocks in the placement data structures block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; @@ -869,7 +854,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ NocCostTerms delta_cost; // call the test function - find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, block_locs); + noc_cost_handler.find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost); // update the test noc cost terms based on the cost changes found by the test functions test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth; @@ -877,8 +862,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ test_noc_costs.latency_overrun += delta_cost.latency_overrun; test_noc_costs.congestion += delta_cost.congestion; - // need this function to update the local datastructures that store all the traffic flow costs - commit_noc_costs(); + // need this function to update the local data structures that store all the traffic flow costs + noc_cost_handler.commit_noc_costs(); // clear the affected blocks blocks_affected.clear_move_blocks(); @@ -905,7 +890,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ ClusterBlockId swap_router_block_two = chosen_traffic_flow.source_router_cluster_id; // now perform the swap - //set up the moved blocks datastructure for the test function + //set up the moved blocks data structure for the test function blocks_affected.moved_blocks.resize(2); blocks_affected.moved_blocks[0].block_num = swap_router_block_one; @@ -934,7 +919,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ router_where_cluster_is_placed[swap_router_block_one] = router_where_cluster_is_placed[swap_router_block_two]; router_where_cluster_is_placed[swap_router_block_two] = router_first_swap_cluster_location; - // now move the blocks in the placement datastructures + // now move the blocks in the placement data structures block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; @@ -1017,7 +1002,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ NocCostTerms delta_cost; // call the test function - find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, block_locs); + noc_cost_handler.find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost); // update the test noc cost terms based on the cost changes found by the test functions test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth; @@ -1025,8 +1010,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ test_noc_costs.latency_overrun += delta_cost.latency_overrun; test_noc_costs.congestion += delta_cost.congestion; - // need this function to update the local datastructures that store all the traffic flow costs - commit_noc_costs(); + // need this function to update the local data structures that store all the traffic flow costs + noc_cost_handler.commit_noc_costs(); // clear the affected blocks blocks_affected.clear_move_blocks(); @@ -1041,7 +1026,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ swap_router_block_two = (ClusterBlockId)(NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST - 4); // now perform the swap - //set up the moved blocks datastructure for the test function + //set up the moved blocks data structure for the test function blocks_affected.moved_blocks.resize(2); blocks_affected.moved_blocks[0].block_num = swap_router_block_one; @@ -1070,7 +1055,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ router_where_cluster_is_placed[swap_router_block_one] = router_where_cluster_is_placed[swap_router_block_two]; router_where_cluster_is_placed[swap_router_block_two] = router_first_swap_cluster_location; - // now move the blocks in the placement datastructures + // now move the blocks in the placement data structures block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; @@ -1118,7 +1103,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ delta_cost = NocCostTerms(); // call the test function - find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, block_locs); + noc_cost_handler.find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost); // update the test noc cost terms based on the cost changes found by the test functions test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth; @@ -1126,8 +1111,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ test_noc_costs.latency_overrun += delta_cost.latency_overrun; test_noc_costs.congestion += delta_cost.congestion; - // need this function to update the local datastructures that store all the traffic flow costs - commit_noc_costs(); + // need this function to update the local data structures that store all the traffic flow costs + noc_cost_handler.commit_noc_costs(); // clear the affected blocks blocks_affected.clear_move_blocks(); @@ -1144,7 +1129,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ swap_router_block_two = (ClusterBlockId)(NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST - 2); // now perform the swap - //set up the moved blocks datastructure for the test function + //set up the moved blocks data structure for the test function blocks_affected.moved_blocks.resize(2); blocks_affected.moved_blocks[0].block_num = swap_router_block_one; @@ -1173,7 +1158,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ router_where_cluster_is_placed[swap_router_block_one] = router_where_cluster_is_placed[swap_router_block_two]; router_where_cluster_is_placed[swap_router_block_two] = router_first_swap_cluster_location; - // now move the blocks in the placement datastructures + // now move the blocks in the placement data structures block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; @@ -1183,7 +1168,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ delta_cost = NocCostTerms(); // call the test function - find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, block_locs); + noc_cost_handler.find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost); // update the test noc cost terms based on the cost changes found by the test functions test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth; @@ -1191,8 +1176,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ test_noc_costs.latency_overrun += delta_cost.latency_overrun; test_noc_costs.congestion += delta_cost.congestion; - // need this function to update the local datastructures that store all the traffic flow costs - commit_noc_costs(); + // need this function to update the local data structures that store all the traffic flow costs + noc_cost_handler.commit_noc_costs(); // clear the affected blocks blocks_affected.clear_move_blocks(); @@ -1206,9 +1191,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ double golden_link_congested_bandwidth = std::max(golden_link_bandwidths[current_link_id] - link_bandwidth, 0.0); double golden_link_congested_bandwidth_ratio = golden_link_congested_bandwidth / link_bandwidth; - REQUIRE(golden_link_bandwidth == current_link.get_bandwidth_usage()); - REQUIRE(golden_link_congested_bandwidth == current_link.get_congested_bandwidth()); - REQUIRE(golden_link_congested_bandwidth_ratio == current_link.get_congested_bandwidth_ratio()); + REQUIRE(golden_link_bandwidth == noc_cost_handler.get_link_used_bandwidth(current_link_id)); + REQUIRE(golden_link_congested_bandwidth_ratio == noc_cost_handler.get_link_congestion_cost(current_link)); } // now find the total expected noc cost terms @@ -1229,7 +1213,6 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ // now check whether the expected noc costs that we manually calculated above match the noc costs found through the test function (we allow for a tolerance of difference) REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth)); REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency)); - std::cout << golden_total_noc_latency_overrun_cost << " " << test_noc_costs.latency_overrun << std::endl; REQUIRE(vtr::isclose(golden_total_noc_latency_overrun_cost, test_noc_costs.latency_overrun)); REQUIRE(vtr::isclose(golden_total_noc_congestion_cost, test_noc_costs.congestion)); @@ -1243,132 +1226,20 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ test_noc_costs.congestion = 0.; // now execute the test function - recompute_noc_costs(test_noc_costs); + test_noc_costs = noc_cost_handler.recompute_noc_costs(); // now verify REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth)); REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency)); REQUIRE(vtr::isclose(golden_total_noc_latency_overrun_cost, test_noc_costs.latency_overrun)); REQUIRE(vtr::isclose(golden_total_noc_congestion_cost, test_noc_costs.congestion)); - - // delete local datastructures - free_noc_placement_structs(); } -TEST_CASE("test_update_noc_normalization_factors", "[noc_place_utils]") { - // creating local parameters needed for the test - t_placer_costs costs; - t_placer_opts placer_opts; - - SECTION("Test case where the bandwidth cost is 0") { - costs.noc_cost_terms.aggregate_bandwidth = 0.; - costs.noc_cost_terms.latency = 1.; - costs.noc_cost_terms.latency_overrun = 1.; - costs.noc_cost_terms.congestion = 1.; - - // run the test function - update_noc_normalization_factors(costs); - - // verify the aggregate bandwidth normalized cost - // this should not be +INF and instead trimmed - REQUIRE(costs.noc_cost_norm_factors.aggregate_bandwidth == 1.0); - } - SECTION("Test case where the latency cost is 0") { - costs.noc_cost_terms.aggregate_bandwidth = 1.; - costs.noc_cost_terms.latency = 0.; - costs.noc_cost_terms.latency_overrun = 1.; - costs.noc_cost_terms.congestion = 1.; - - // run the test function - update_noc_normalization_factors(costs); - - // verify the latency normalized cost - // this should not be +INF and instead trimmed - REQUIRE(costs.noc_cost_norm_factors.latency == 1.e12); - } - SECTION("Test case where the bandwidth cost is an expected value") { - costs.noc_cost_terms.aggregate_bandwidth = 1.e9; - costs.noc_cost_terms.latency = 1.; - costs.noc_cost_terms.latency_overrun = 1.; - costs.noc_cost_terms.congestion = 1.; - - // run the test function - update_noc_normalization_factors(costs); - - // verify the aggregate bandwidth normalized cost - // this should not be trimmed - REQUIRE(costs.noc_cost_norm_factors.aggregate_bandwidth == 1.e-9); - } - SECTION("Test case where the latency cost is an expected value") { - costs.noc_cost_terms.aggregate_bandwidth = 1.; - costs.noc_cost_terms.latency = 50.e-12; - costs.noc_cost_terms.latency_overrun = 1.; - costs.noc_cost_terms.congestion = 1.; - - // run the test function - update_noc_normalization_factors(costs); - - // verify the latency normalized cost - // this should not be trimmed - REQUIRE(costs.noc_cost_norm_factors.latency == 2.e10); - } - SECTION("Test case where the latency cost is lower than the smallest expected value") { - costs.noc_cost_terms.aggregate_bandwidth = 1.; - costs.noc_cost_terms.latency = 999.e-15; - costs.noc_cost_terms.latency_overrun = 1.; - costs.noc_cost_terms.congestion = 1.; - - // run the test function - update_noc_normalization_factors(costs); - - // verify the latency normalized cost - // this should not be trimmed - REQUIRE(costs.noc_cost_norm_factors.latency == 1.e12); - } - SECTION("Test case where the congestion cost is zero") { - costs.noc_cost_terms.aggregate_bandwidth = 1.; - costs.noc_cost_terms.latency = 1.; - costs.noc_cost_terms.latency_overrun = 1.; - costs.noc_cost_terms.congestion = 0.; - - // run the test function - update_noc_normalization_factors(costs); - - // verify the congestion normalization factor - // this should not be infinite - REQUIRE(costs.noc_cost_norm_factors.congestion == 1.e3); - } - SECTION("Test case where the congestion cost is lower than the smallest expected value") { - costs.noc_cost_terms.aggregate_bandwidth = 1.; - costs.noc_cost_terms.latency = 1.; - costs.noc_cost_terms.latency_overrun = 1.; - costs.noc_cost_terms.congestion = 999.e-15; - - // run the test function - update_noc_normalization_factors(costs); - - // verify the congestion normalization factor - // this should not be infinite - REQUIRE(costs.noc_cost_norm_factors.congestion == 1.e3); - } - SECTION("Test case where the congestion cost is an expected value") { - costs.noc_cost_terms.aggregate_bandwidth = 1.; - costs.noc_cost_terms.latency = 1.; - costs.noc_cost_terms.latency_overrun = 1.; - costs.noc_cost_terms.congestion = 1.e2; - - // run the test function - update_noc_normalization_factors(costs); - - // verify the congestion normalization factor - REQUIRE(costs.noc_cost_norm_factors.congestion == 1.e-2); - } -} TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { // setup random number generation std::random_device device; std::mt19937 rand_num_gen(device()); - std::uniform_int_distribution dist(0, NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST - 1); + std::uniform_int_distribution dist(0, NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST - 3); // this sets the range of possible bandwidths for a traffic flow std::uniform_int_distribution dist_2(0, 1000); // this sets the range of possible priorities @@ -1377,12 +1248,12 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { std::uniform_real_distribution dist_3(1, 25); std::default_random_engine double_engine; - // get global datastructures + // get global data structures auto& noc_ctx = g_vpr_ctx.mutable_noc(); auto& place_ctx = g_vpr_ctx.mutable_placement(); auto& block_locs = place_ctx.mutable_block_locs(); - // start by deleting any global datastructures (this is so that we don't have corruption from previous tests) + // start by deleting any global data structures (this is so that we don't have corruption from previous tests) noc_ctx.noc_model.clear_noc(); noc_ctx.noc_traffic_flows_storage.clear_traffic_flows(); block_locs.clear(); @@ -1403,14 +1274,16 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { noc_opts.noc_latency_weighting = dist_3(double_engine); noc_opts.noc_congestion_weighting = dist_3(double_engine); - constexpr double LINK_LATENCY = 1; - constexpr double LINK_BANDWIDTH = 1; - constexpr double ROUTER_LATENCY = 1; + constexpr double link_bandwidth = 1.0; // setting the NoC parameters - noc_ctx.noc_model.set_noc_link_latency(LINK_BANDWIDTH); - noc_ctx.noc_model.set_noc_router_latency(ROUTER_LATENCY); - noc_ctx.noc_model.set_noc_link_bandwidth(LINK_BANDWIDTH); + noc_ctx.noc_model.set_noc_link_latency(1); + noc_ctx.noc_model.set_noc_router_latency(1); + noc_ctx.noc_model.set_noc_link_bandwidth(link_bandwidth); + + // needs to be the same as above + const double router_latency = noc_ctx.noc_model.get_noc_router_latency(); + const double link_latency = noc_ctx.noc_model.get_noc_link_latency(); // keeps track of which hard router each cluster block is placed vtr::vector router_where_cluster_is_placed; @@ -1428,7 +1301,7 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { router_grid_position_x, router_grid_position_y, 0, - ROUTER_LATENCY); + router_latency); } noc_ctx.noc_model.make_room_for_noc_router_link_list(); @@ -1438,19 +1311,19 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { for (int j = 0; j < MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST; j++) { // add a link to the left of the router if there exists another router there if ((j - 1) >= 0) { - noc_ctx.noc_model.add_link((NocRouterId)((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j), (NocRouterId)(((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j) - 1), LINK_BANDWIDTH, LINK_LATENCY); + noc_ctx.noc_model.add_link((NocRouterId)((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j), (NocRouterId)(((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j) - 1), link_bandwidth, link_latency); } // add a link to the top of the router if there exists another router there if ((i + 1) <= MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST - 1) { - noc_ctx.noc_model.add_link((NocRouterId)((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j), (NocRouterId)(((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j) + MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST), LINK_BANDWIDTH, LINK_LATENCY); + noc_ctx.noc_model.add_link((NocRouterId)((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j), (NocRouterId)(((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j) + MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST), link_bandwidth, link_latency); } // add a link to the right of the router if there exists another router there if ((j + 1) <= MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST - 1) { - noc_ctx.noc_model.add_link((NocRouterId)((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j), (NocRouterId)(((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j) + 1), LINK_BANDWIDTH, LINK_LATENCY); + noc_ctx.noc_model.add_link((NocRouterId)((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j), (NocRouterId)(((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j) + 1), link_bandwidth, link_latency); } // add a link to the bottom of the router if there exists another router there if ((i - 1) >= 0) { - noc_ctx.noc_model.add_link((NocRouterId)((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j), (NocRouterId)(((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j) - MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST), LINK_BANDWIDTH, LINK_LATENCY); + noc_ctx.noc_model.add_link((NocRouterId)((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j), (NocRouterId)(((i * MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST) + j) - MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST), link_bandwidth, link_latency); } } } @@ -1470,7 +1343,7 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { router_where_cluster_is_placed.push_back((NocRouterId)cluster_block_number); - // now add the cluster and its placed location to the placement datastructures + // now add the cluster and its placed location to the placement data structures block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); } @@ -1480,7 +1353,8 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { int number_of_created_traffic_flows = 0; // now create a random number of traffic flows - for (int cluster_block_number = 0; cluster_block_number < NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST; cluster_block_number++) { + // now we want the last two router clusters to not have any traffic flows associated to them, so restrict this loop to all router clusters except the last two + for (int cluster_block_number = 0; cluster_block_number < NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST - 2; cluster_block_number++) { // the current cluster block number will act as the source router // and we will choose a random router to act as the sink router @@ -1488,6 +1362,7 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { ClusterBlockId sink_router_for_traffic_flow; // randomly choose sink router + // make sure the traffic flow does not start and end at the same router and also make sure that the sink router is never that last 2 router cluster blocks (we don't want them associated to any traffic flows) do { sink_router_for_traffic_flow = (ClusterBlockId)dist(rand_num_gen); } while (sink_router_for_traffic_flow == source_router_for_traffic_flow); @@ -1510,7 +1385,6 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { break; } } - noc_ctx.noc_traffic_flows_storage.finished_noc_traffic_flows_setup(); // now go and route all the traffic flows // @@ -1523,15 +1397,27 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { // store the traffic flow routes found vtr::vector> golden_traffic_flow_routes; golden_traffic_flow_routes.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows()); + // store the traffic flow bandwidth costs and latency costs + vtr::vector golden_traffic_flow_bandwidth_costs; + vtr::vector golden_traffic_flow_latency_costs; + vtr::vector golden_traffic_flow_latency_overrun_costs; + golden_traffic_flow_bandwidth_costs.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows()); + golden_traffic_flow_latency_costs.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows()); + golden_traffic_flow_latency_overrun_costs.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows()); + // store link congestion costs + vtr::vector golden_link_congestion_costs; + golden_link_congestion_costs.resize(noc_ctx.noc_model.get_number_of_noc_links()); + + // stores the change in bandwidth and latency costs from the test function + NocCostTerms test_noc_costs; // we need to route all the traffic flows based on their initial positions - for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) { + for (int traffic_flow_number = 0; traffic_flow_number < number_of_created_traffic_flows; traffic_flow_number++) { const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number); // get the source and sink routers of this traffic flow int source_hard_router_id = (size_t)curr_traffic_flow.source_router_cluster_id; int sink_hard_routed_id = (size_t)curr_traffic_flow.sink_router_cluster_id; - // route it routing_algorithm->route_flow((NocRouterId)source_hard_router_id, (NocRouterId)sink_hard_routed_id, @@ -1540,57 +1426,151 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { noc_ctx.noc_model); } - const vtr::vector> initial_golden_traffic_flow_routes = golden_traffic_flow_routes; - + NocCostHandler noc_cost_handler(block_locs); // assume this works - // this is needed to set up the global noc packet router and also global datastructures - initial_noc_routing({}, block_locs); + // this is needed to set up the global noc packet router and also global data structures + noc_cost_handler.initial_noc_routing({}); - // datastructure below will store the bandwidth usages of all the links + // data structure below will store the bandwidth usages of all the links // and will be updated throughout this test. - // These link bandwidths will then be compared to link bandwidths in the NoC datastructure (these will be updated in the test function) + // These link bandwidths will then be compared to link bandwidths in the NoC data structure (these will be updated in the test function) vtr::vector golden_link_bandwidths; golden_link_bandwidths.resize(noc_ctx.noc_model.get_noc_links().size(), 0.0); // now store update the bandwidths used by all the links based on the initial traffic flow routes // also initialize the bandwidth and latency costs for all traffic flows // and sum them up to calculate the total initial aggregate bandwidth and latency costs for the NoC - for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) { - const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number); + for (int traffic_flow_number = 0; traffic_flow_number < number_of_created_traffic_flows; traffic_flow_number++) { + const auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number; + const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id); - for (auto& link : golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number]) { + for (auto& link : golden_traffic_flow_routes[traffic_flow_id]) { golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth; } + + // calculate the bandwidth cost + golden_traffic_flow_bandwidth_costs[traffic_flow_id] = golden_traffic_flow_routes[traffic_flow_id].size() * curr_traffic_flow.traffic_flow_bandwidth; + golden_traffic_flow_bandwidth_costs[traffic_flow_id] *= curr_traffic_flow.traffic_flow_priority; + + double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[traffic_flow_id].size() + 1)) + (link_latency * golden_traffic_flow_routes[traffic_flow_id].size()); + + golden_traffic_flow_latency_costs[traffic_flow_id] = curr_traffic_flow_latency; + golden_traffic_flow_latency_overrun_costs[traffic_flow_id] = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.); + golden_traffic_flow_latency_costs[traffic_flow_id] *= curr_traffic_flow.traffic_flow_priority; + golden_traffic_flow_latency_overrun_costs[traffic_flow_id] *= curr_traffic_flow.traffic_flow_priority; + + test_noc_costs.aggregate_bandwidth += golden_traffic_flow_bandwidth_costs[traffic_flow_id]; + test_noc_costs.latency += golden_traffic_flow_latency_costs[traffic_flow_id]; + test_noc_costs.latency_overrun += golden_traffic_flow_latency_overrun_costs[traffic_flow_id]; } - /* The function that is tested here will basically assume all the blocks - * have been moved back to their original position before being swapped in - * placement. Then it will re-route the traffic flows with the blocks in - * their original placement, and update the traffic flow bandwidth. - * - * So for this test, we will first choose two random blocks to swap, then - * we will update the link bandwidths within the NoC to imitate this swap. - * Then we will call the test function to see whether it re-updates the link bandwidths to their original values. - */ + // initialize golden congestion cost for all links + for (const auto& link : noc_ctx.noc_model.get_noc_links()) { + auto link_id = link.get_link_id(); + golden_link_congestion_costs[link_id] = std::max(golden_link_bandwidths[link_id] - link_bandwidth, 0.0); + test_noc_costs.congestion += golden_link_congestion_costs[link_id]; + } + + // We need to run these functions as they initialize local variables needed to run the test function within this unit test. we assume this is correct + noc_cost_handler.comp_noc_aggregate_bandwidth_cost(); + noc_cost_handler.comp_noc_latency_cost(); + noc_cost_handler.comp_noc_congestion_cost(); - // datastructure that keeps track of moved blocks during placement + // data structure that keeps track of moved blocks during placement t_pl_blocks_to_be_moved blocks_affected(NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST); - // datastructure that keeps track of all the traffic flows that have been re-routed + // data structure that keeps track of all the traffic flows that have been re-routed std::unordered_set routed_traffic_flows; - // get the two cluster blocks to swap first - ClusterBlockId swap_router_block_one = (ClusterBlockId)dist(rand_num_gen); - ClusterBlockId swap_router_block_two; - do { - swap_router_block_two = (ClusterBlockId)dist(rand_num_gen); - } while (swap_router_block_one == swap_router_block_two); + /* Now we imitate placement here by swapping two clusters block + * positions. In each iteration, we first update the positions + * of the two router cluster blocks, then we update the traffic + * flows and then the bandwidth usages of the links. Then we call + * the test function and then move onto the next iteration. + */ + for (int iteration_number = 0; iteration_number < NUM_OF_PLACEMENT_MOVES_NOC_PLACE_UTILS_TEST; iteration_number++) { + // get the two cluster blocks to swap first + // make sure that the two router blocks are not the last two router cluster blocks and also aren't the same clusters themselves + ClusterBlockId swap_router_block_one = (ClusterBlockId)dist(rand_num_gen); + ClusterBlockId swap_router_block_two; + do { + swap_router_block_two = (ClusterBlockId)dist(rand_num_gen); + } while (swap_router_block_one == swap_router_block_two); + + //set up the moved blocks data structure for the test function + blocks_affected.moved_blocks.resize(2); + + blocks_affected.moved_blocks[0].block_num = swap_router_block_one; + blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); + blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); + + blocks_affected.moved_blocks[1].block_num = swap_router_block_two; + blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); + blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); + + // swap the hard router blocks where the two cluster blocks are placed on + std::swap(router_where_cluster_is_placed[swap_router_block_one], router_where_cluster_is_placed[swap_router_block_two]); + + // now move the blocks in the placement data structures + block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; + block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; + + NocCostTerms delta_cost; + + // call the test function + noc_cost_handler.find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost); + + // swap the hard router blocks where the two cluster blocks are placed on + std::swap(router_where_cluster_is_placed[swap_router_block_one], router_where_cluster_is_placed[swap_router_block_two]); + + // now move the blocks in the placement data structures + block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].old_loc; + block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].old_loc; + + // need this function to update the local data structures that store all the traffic flow costs + noc_cost_handler.revert_noc_traffic_flow_routes(blocks_affected); + + // clear the affected blocks + blocks_affected.clear_move_blocks(); + + // clear the routed traffic flows + routed_traffic_flows.clear(); + } + + /* Now we will run a test where the two routers that are moved share a + * traffic flow with each other. This is used to verify whether the + * function under test correctly gets the positions of the moved blocks. + * It also checks that the test function correctly handles the situation + * where the moved router cluster block is a sink router in its associated traffic flows. + */ + // start by choosing a random traffic flow + NocTrafficFlowId random_traffic_flow = (NocTrafficFlowId)dist(rand_num_gen); + + // get the current traffic flow + const t_noc_traffic_flow& chosen_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow(random_traffic_flow); + + // now swap the two blocks within this traffic flow + ClusterBlockId swap_router_block_one = chosen_traffic_flow.sink_router_cluster_id; + ClusterBlockId swap_router_block_two = chosen_traffic_flow.source_router_cluster_id; - //set up the moved blocks datastructure for the test function - // this is needed for the test function (it needs to know what blocks were swapped, so it can undo it) + // now perform the swap + //set up the moved blocks data structure for the test function blocks_affected.moved_blocks.resize(2); blocks_affected.moved_blocks[0].block_num = swap_router_block_one; + blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1, @@ -1611,111 +1591,316 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); // swap the hard router blocks where the two cluster blocks are placed on - // this is needed to that we can - NocRouterId router_first_swap_cluster_location = router_where_cluster_is_placed[swap_router_block_one]; - router_where_cluster_is_placed[swap_router_block_one] = router_where_cluster_is_placed[swap_router_block_two]; - router_where_cluster_is_placed[swap_router_block_two] = router_first_swap_cluster_location; + std::swap(router_where_cluster_is_placed[swap_router_block_one], router_where_cluster_is_placed[swap_router_block_two]); - // get all the associated traffic flows of the moved cluster blocks - const std::vector& assoc_traffic_flows_block_one = noc_ctx.noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(swap_router_block_one); - const std::vector& assoc_traffic_flows_block_two = noc_ctx.noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(swap_router_block_two); + // now move the blocks in the placement data structures + block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; + block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; - // now go through the traffic flows and update the link bandwidths and traffic flow routes locally - for (auto traffic_flow_id : assoc_traffic_flows_block_one) { - if (routed_traffic_flows.find(traffic_flow_id) == routed_traffic_flows.end()) { - // get the current traffic flow - const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id); - - std::vector& traffic_flow_route = noc_ctx.noc_traffic_flows_storage.get_mutable_traffic_flow_route(traffic_flow_id); - traffic_flow_route.clear(); - - // go through the current traffic flow and reduce the bandwidths of the links (we only update this in the NoC, since these changes should be rectified by the test function) - // This shouldn't be updated in the golden bandwidths since we are imitating a swap of blocks and not having a real swap of blocks - for (auto& link : golden_traffic_flow_routes[traffic_flow_id]) { - // update the link bandwidth in the NoC datastructure - double current_link_bandwidth = noc_ctx.noc_model.get_single_noc_link(link).get_bandwidth_usage(); - noc_ctx.noc_model.get_single_mutable_noc_link(link).set_bandwidth_usage(current_link_bandwidth - curr_traffic_flow.traffic_flow_bandwidth); - } + NocCostTerms delta_cost; - // re-route the traffic flow - noc_ctx.noc_flows_router->route_flow(router_where_cluster_is_placed[curr_traffic_flow.source_router_cluster_id], - router_where_cluster_is_placed[curr_traffic_flow.sink_router_cluster_id], - traffic_flow_id, - golden_traffic_flow_routes[traffic_flow_id], - noc_ctx.noc_model); - - // go through the current traffic flow and reduce the bandwidths of the links (we only update this in the NoC, since these changes should be rectified by the test function) - // This shouldn't be updated in the golden bandwidths since we are imitating a swap of blocks and not having a real swap of blocks - for (auto& link : golden_traffic_flow_routes[traffic_flow_id]) { - // update the link bandwidth in the NoC datastructure - double current_link_bandwidth = noc_ctx.noc_model.get_single_noc_link(link).get_bandwidth_usage(); - noc_ctx.noc_model.get_single_mutable_noc_link(link).set_bandwidth_usage(current_link_bandwidth + curr_traffic_flow.traffic_flow_bandwidth); - traffic_flow_route.push_back(link); - } + // call the test function + noc_cost_handler.find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost); - routed_traffic_flows.insert(traffic_flow_id); - } - } + // swap the hard router blocks where the two cluster blocks are placed on + std::swap(router_where_cluster_is_placed[swap_router_block_one], router_where_cluster_is_placed[swap_router_block_two]); - // now go through the traffic flows associated with the second swapped block - for (auto traffic_flow_id : assoc_traffic_flows_block_two) { - if (routed_traffic_flows.find(traffic_flow_id) == routed_traffic_flows.end()) { - // get the current traffic flow - const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id); - - std::vector& traffic_flow_route = noc_ctx.noc_traffic_flows_storage.get_mutable_traffic_flow_route(traffic_flow_id); - traffic_flow_route.clear(); - - // go through the current traffic flow and reduce the bandwidths of the links (we only update this in the NoC, since these changes should be rectified by the test function) - // This shouldn't be updated in the golden bandwidths since we are imitating a swap of blocks and not having a real swap of blocks - for (auto& link : golden_traffic_flow_routes[traffic_flow_id]) { - // update the link bandwidth in the NoC datastructure - double current_link_bandwidth = noc_ctx.noc_model.get_single_noc_link(link).get_bandwidth_usage(); - noc_ctx.noc_model.get_single_mutable_noc_link(link).set_bandwidth_usage(current_link_bandwidth - curr_traffic_flow.traffic_flow_bandwidth); - } + // now move the blocks in the placement data structures + block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].old_loc; + block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].old_loc; - // re-route the traffic flow - noc_ctx.noc_flows_router->route_flow(router_where_cluster_is_placed[curr_traffic_flow.source_router_cluster_id], - router_where_cluster_is_placed[curr_traffic_flow.sink_router_cluster_id], - traffic_flow_id, - golden_traffic_flow_routes[traffic_flow_id], - noc_ctx.noc_model); - - // go through the current traffic flow and reduce the bandwidths of the links (we only update this in the NoC, since these changes should be rectified by the test function) - // This shouldn't be updated in the golden bandwidths since we are imitating a swap of blocks and not having a real swap of blocks - for (auto& link : golden_traffic_flow_routes[traffic_flow_id]) { - // update the link bandwidth in the NoC datastructure - double current_link_bandwidth = noc_ctx.noc_model.get_single_noc_link(link).get_bandwidth_usage(); - noc_ctx.noc_model.get_single_mutable_noc_link(link).set_bandwidth_usage(current_link_bandwidth + curr_traffic_flow.traffic_flow_bandwidth); - traffic_flow_route.push_back(link); - } + // need this function to update the local data structures that store all the traffic flow costs + noc_cost_handler.revert_noc_traffic_flow_routes(blocks_affected); - routed_traffic_flows.insert(traffic_flow_id); - } - } + // clear the affected blocks + blocks_affected.clear_move_blocks(); + + /* Now we will run a test where one of the router clusters we will swap has no traffic flows associated with it. This will make sure whether the test + * function currently determines that a router cluster block has no traffic flows and also calculates that cost accordingly (cost of 0) + */ + // start by picking one of the router cluster blocks that don't have any traffic flows as one of our cluster blocks to swap + swap_router_block_one = (ClusterBlockId)(NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST - 1); + // the second router block to swap will be one with a traffic flow associated to it + swap_router_block_two = (ClusterBlockId)(NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST - 4); + + // now perform the swap + //set up the moved blocks data structure for the test function + blocks_affected.moved_blocks.resize(2); + + blocks_affected.moved_blocks[0].block_num = swap_router_block_one; + + blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); + blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); + + blocks_affected.moved_blocks[1].block_num = swap_router_block_two; + blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); + blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); + + // swap the hard router blocks where the two cluster blocks are placed on + std::swap(router_where_cluster_is_placed[swap_router_block_one], router_where_cluster_is_placed[swap_router_block_two]); + + // now move the blocks in the placement data structures + block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; + block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; + + // reset the delta costs + delta_cost = NocCostTerms(); + + // call the test function + noc_cost_handler.find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost); + + // swap the hard router blocks where the two cluster blocks are placed on + std::swap(router_where_cluster_is_placed[swap_router_block_one], router_where_cluster_is_placed[swap_router_block_two]); + + // now move the blocks in the placement data structures + block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].old_loc; + block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].old_loc; + + // need this function to update the local data structures that store all the traffic flow costs + noc_cost_handler.revert_noc_traffic_flow_routes(blocks_affected); + + // clear the affected blocks + blocks_affected.clear_move_blocks(); + + /* + * Now we will run a test where both of the router clusters being swapped + * do not have traffic flows associated to them. This will make sure whether + * the test function currently determines that both router blocks have no + * traffic flows associated with them and calculates the cost change accordingly (total cost of 0) + */ + // start by picking one of the router cluster blocks that don't have any traffic flows as one of our cluster blocks to swap + swap_router_block_one = (ClusterBlockId)(NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST - 1); + // the second router block to swap will be one with a traffic flow associated to it + swap_router_block_two = (ClusterBlockId)(NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST - 2); + + // now perform the swap + //set up the moved blocks data structure for the test function + blocks_affected.moved_blocks.resize(2); + + blocks_affected.moved_blocks[0].block_num = swap_router_block_one; + + blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); + blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); - // okay so now we undo the swapped blocks. We don't need to update the block locations in the placement datastructure since we initially never moved the blocks that were swapped at the start. - // To undo this we just need to update the noc link bandwidths as if there was no swap (we do this by calling the test function) - // This should then re-update the noc link bandwidths to their values before we imitated the swap above - // THe result is that the link bandwidths should match the golden link bandwidths that never changed after the initial router block placement (at a point before block swapping) - revert_noc_traffic_flow_routes(blocks_affected, block_locs); + blocks_affected.moved_blocks[1].block_num = swap_router_block_two; + blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); + blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); - // now verify if the test function worked correctly by comparing the noc link bandwidths to the golden link bandwidths + // swap the hard router blocks where the two cluster blocks are placed on + std::swap(router_where_cluster_is_placed[swap_router_block_one], router_where_cluster_is_placed[swap_router_block_two]); + + // now move the blocks in the placement data structures + block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; + block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; + + // we don't have to calculate the costs or update bandwidths because the swapped router blocks do not have any associated traffic flows // + + // reset the delta costs + delta_cost = NocCostTerms(); + + // call the test function + noc_cost_handler.find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost); + + // swap the hard router blocks where the two cluster blocks are placed on + std::swap(router_where_cluster_is_placed[swap_router_block_one], router_where_cluster_is_placed[swap_router_block_two]); + + // now move the blocks in the placement data structures + block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].old_loc; + block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].old_loc; + + // need this function to update the local data structures that store all the traffic flow costs + noc_cost_handler.revert_noc_traffic_flow_routes(blocks_affected); + + // clear the affected blocks + blocks_affected.clear_move_blocks(); + + // now verify the test function by comparing the link bandwidths in the noc model (should have been updated by the test function) to the golden set int number_of_links = golden_link_bandwidths.size(); for (int link_number = 0; link_number < number_of_links; link_number++) { NocLinkId current_link_id = (NocLinkId)link_number; const NocLink& current_link = noc_ctx.noc_model.get_single_noc_link(current_link_id); + double golden_link_bandwidth = golden_link_bandwidths[current_link_id]; + double golden_link_congested_bandwidth = std::max(golden_link_bandwidths[current_link_id] - link_bandwidth, 0.0); + double golden_link_congested_bandwidth_ratio = golden_link_congested_bandwidth / link_bandwidth; - REQUIRE(golden_link_bandwidths[current_link_id] == current_link.get_bandwidth_usage()); + REQUIRE(golden_link_bandwidth == noc_cost_handler.get_link_used_bandwidth(current_link_id)); + REQUIRE(golden_link_congested_bandwidth_ratio == noc_cost_handler.get_link_congestion_cost(current_link)); } - for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) { - auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number; - const auto& traffic_flow_route = noc_ctx.noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id); - const auto& golden_traffic_flow_route = initial_golden_traffic_flow_routes[traffic_flow_id]; - REQUIRE(traffic_flow_route == golden_traffic_flow_route); + // now find the total expected noc cost terms + double golden_total_noc_aggr_bandwidth_cost = 0.; + double golden_total_noc_latency_cost = 0.; + double golden_total_noc_latency_overrun_cost = 0.; + double golden_total_noc_congestion_cost = 0.; + + for (int traffic_flow_number = 0; traffic_flow_number < number_of_created_traffic_flows; traffic_flow_number++) { + const auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number; + golden_total_noc_aggr_bandwidth_cost += golden_traffic_flow_bandwidth_costs[traffic_flow_id]; + golden_total_noc_latency_cost += golden_traffic_flow_latency_costs[traffic_flow_id]; + golden_total_noc_latency_overrun_cost += golden_traffic_flow_latency_overrun_costs[traffic_flow_id]; + } + + golden_total_noc_congestion_cost = std::accumulate(golden_link_congestion_costs.begin(), golden_link_congestion_costs.end(), 0.0); + + // now check whether the expected noc costs that we manually calculated above match the noc costs found through the test function (we allow for a tolerance of difference) + REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth)); + REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency)); + REQUIRE(vtr::isclose(golden_total_noc_latency_overrun_cost, test_noc_costs.latency_overrun)); + REQUIRE(vtr::isclose(golden_total_noc_congestion_cost, test_noc_costs.congestion)); + + // now test the recompute cost function // + // The recompute cost function just adds up all traffic flow costs, so it match the expected noc costs that we manually calculated above by summing up all the expected individual traffic flow costs. // + + // start by resetting the test cost variables + test_noc_costs.aggregate_bandwidth = 0.; + test_noc_costs.latency = 0.; + test_noc_costs.latency_overrun = 0.; + test_noc_costs.congestion = 0.; + + // now execute the test function + test_noc_costs = noc_cost_handler.recompute_noc_costs(); + + // now verify + REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth)); + REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency)); + REQUIRE(vtr::isclose(golden_total_noc_latency_overrun_cost, test_noc_costs.latency_overrun)); + REQUIRE(vtr::isclose(golden_total_noc_congestion_cost, test_noc_costs.congestion)); +} + +TEST_CASE("test_update_noc_normalization_factors", "[noc_place_utils]") { + // creating local parameters needed for the test + t_placer_costs costs; + t_placer_opts placer_opts; + + SECTION("Test case where the bandwidth cost is 0") { + costs.noc_cost_terms.aggregate_bandwidth = 0.; + costs.noc_cost_terms.latency = 1.; + costs.noc_cost_terms.latency_overrun = 1.; + costs.noc_cost_terms.congestion = 1.; + + // run the test function + NocCostHandler::update_noc_normalization_factors(costs); + + // verify the aggregate bandwidth normalized cost + // this should not be +INF and instead trimmed + REQUIRE(costs.noc_cost_norm_factors.aggregate_bandwidth == 1.0); + } + SECTION("Test case where the latency cost is 0") { + costs.noc_cost_terms.aggregate_bandwidth = 1.; + costs.noc_cost_terms.latency = 0.; + costs.noc_cost_terms.latency_overrun = 1.; + costs.noc_cost_terms.congestion = 1.; + + // run the test function + NocCostHandler::update_noc_normalization_factors(costs); + + // verify the latency normalized cost + // this should not be +INF and instead trimmed + REQUIRE(costs.noc_cost_norm_factors.latency == 1.e12); + } + SECTION("Test case where the bandwidth cost is an expected value") { + costs.noc_cost_terms.aggregate_bandwidth = 1.e9; + costs.noc_cost_terms.latency = 1.; + costs.noc_cost_terms.latency_overrun = 1.; + costs.noc_cost_terms.congestion = 1.; + + // run the test function + NocCostHandler::update_noc_normalization_factors(costs); + + // verify the aggregate bandwidth normalized cost + // this should not be trimmed + REQUIRE(costs.noc_cost_norm_factors.aggregate_bandwidth == 1.e-9); + } + SECTION("Test case where the latency cost is an expected value") { + costs.noc_cost_terms.aggregate_bandwidth = 1.; + costs.noc_cost_terms.latency = 50.e-12; + costs.noc_cost_terms.latency_overrun = 1.; + costs.noc_cost_terms.congestion = 1.; + + // run the test function + NocCostHandler::update_noc_normalization_factors(costs); + + // verify the latency normalized cost + // this should not be trimmed + REQUIRE(costs.noc_cost_norm_factors.latency == 2.e10); + } + SECTION("Test case where the latency cost is lower than the smallest expected value") { + costs.noc_cost_terms.aggregate_bandwidth = 1.; + costs.noc_cost_terms.latency = 999.e-15; + costs.noc_cost_terms.latency_overrun = 1.; + costs.noc_cost_terms.congestion = 1.; + + // run the test function + NocCostHandler::update_noc_normalization_factors(costs); + + // verify the latency normalized cost + // this should not be trimmed + REQUIRE(costs.noc_cost_norm_factors.latency == 1.e12); + } + SECTION("Test case where the congestion cost is zero") { + costs.noc_cost_terms.aggregate_bandwidth = 1.; + costs.noc_cost_terms.latency = 1.; + costs.noc_cost_terms.latency_overrun = 1.; + costs.noc_cost_terms.congestion = 0.; + + // run the test function + NocCostHandler::update_noc_normalization_factors(costs); + + // verify the congestion normalization factor + // this should not be infinite + REQUIRE(costs.noc_cost_norm_factors.congestion == 1.e3); + } + SECTION("Test case where the congestion cost is lower than the smallest expected value") { + costs.noc_cost_terms.aggregate_bandwidth = 1.; + costs.noc_cost_terms.latency = 1.; + costs.noc_cost_terms.latency_overrun = 1.; + costs.noc_cost_terms.congestion = 999.e-15; + + // run the test function + NocCostHandler::update_noc_normalization_factors(costs); + + // verify the congestion normalization factor + // this should not be infinite + REQUIRE(costs.noc_cost_norm_factors.congestion == 1.e3); + } + SECTION("Test case where the congestion cost is an expected value") { + costs.noc_cost_terms.aggregate_bandwidth = 1.; + costs.noc_cost_terms.latency = 1.; + costs.noc_cost_terms.latency_overrun = 1.; + costs.noc_cost_terms.congestion = 1.e2; + + // run the test function + NocCostHandler::update_noc_normalization_factors(costs); + + // verify the congestion normalization factor + REQUIRE(costs.noc_cost_norm_factors.congestion == 1.e-2); } } + TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { // setup random number generation std::random_device device; @@ -1729,12 +1914,12 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { std::uniform_real_distribution dist_3(1, 25); std::default_random_engine double_engine; - // get global datastructures + // get global data structures auto& noc_ctx = g_vpr_ctx.mutable_noc(); auto& place_ctx = g_vpr_ctx.mutable_placement(); auto& block_locs = place_ctx.mutable_block_locs(); - // start by deleting any global datastructures (this is so that we don't have corruption from previous tests) + // start by deleting any global data structures (this is so that we don't have corruption from previous tests) noc_ctx.noc_model.clear_noc(); noc_ctx.noc_traffic_flows_storage.clear_traffic_flows(); block_locs.clear(); @@ -1807,11 +1992,6 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { } } - // initialize NoC link bandwidth usage - for (auto& noc_link : noc_ctx.noc_model.get_mutable_noc_links()) { - noc_link.set_bandwidth_usage(0.0); - } - // now we need to create router cluster blocks and passing them to placed at a router hard block as an initial position for (int cluster_block_number = 0; cluster_block_number < NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST; cluster_block_number++) { // since the indexes for the hard router blocks start from 0, we will just place the router clusters on hard router blocks with the same id // @@ -1827,7 +2007,7 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { router_where_cluster_is_placed.push_back((NocRouterId)cluster_block_number); - // now add the cluster and its placed location to the placement datastructures + // now add the cluster and its placed location to the placement data structures block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); } @@ -1881,6 +2061,8 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { vtr::vector> golden_traffic_flow_routes; golden_traffic_flow_routes.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows()); + vtr::vector golden_link_bandwidth_usage(noc_ctx.noc_model.get_number_of_noc_links(), 0.); + // we need to route all the traffic flows based on their initial positions for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) { const auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number; @@ -1902,11 +2084,8 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { noc_ctx.noc_model); // update link bandwidth utilization - for (auto link_id : traffic_flow_route) { - auto& noc_link = noc_ctx.noc_model.get_single_mutable_noc_link(link_id); - double curr_link_bw_util = noc_link.get_bandwidth_usage(); - curr_link_bw_util += traffic_flow_bandwidth; - noc_link.set_bandwidth_usage(curr_link_bw_util); + for (const NocLinkId link_id : traffic_flow_route) { + golden_link_bandwidth_usage[link_id] += traffic_flow_bandwidth; } } @@ -1947,8 +2126,9 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { // calculate the congestion cost for (const auto& noc_link : noc_ctx.noc_model.get_noc_links()) { - double curr_congestion_cost = noc_link.get_congested_bandwidth_ratio(); - costs.noc_cost_terms.congestion += curr_congestion_cost; + double bw = noc_link.get_bandwidth(); + double congested_bw = std::max(golden_link_bandwidth_usage[noc_link.get_link_id()] - bw, 0.); + costs.noc_cost_terms.congestion += congested_bw / bw; } // this defines the error tolerance that is allowed between the golden noc costs and the costs found by the test function: check_noc_placement_costs @@ -1956,30 +2136,33 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { double error_tolerance = .01; SECTION("Case where check place works after initial placement") { + NocCostHandler noc_cost_handler(block_locs); // run the test function - int error = check_noc_placement_costs(costs, error_tolerance, noc_opts, block_locs); + int error = noc_cost_handler.check_noc_placement_costs(costs, error_tolerance, noc_opts); // we expect error to be 0 here, meaning the found costs are within the error tolerance of the noc golden costs REQUIRE(error == 0); } SECTION("Case where the check place fails for both NoC costs") { + NocCostHandler noc_cost_handler(block_locs); + // we need to make the aggregate bandwidth cost and latency cost be a value that is larger or smaller than the tolerance value costs.noc_cost_terms.aggregate_bandwidth += (costs.noc_cost_terms.aggregate_bandwidth * error_tolerance * 2); costs.noc_cost_terms.latency -= (costs.noc_cost_terms.latency * error_tolerance * 2); if (costs.noc_cost_terms.latency_overrun == 0) { - costs.noc_cost_terms.latency_overrun += MIN_EXPECTED_NOC_LATENCY_COST * error_tolerance * 2; + costs.noc_cost_terms.latency_overrun += NocCostHandler::MIN_EXPECTED_NOC_LATENCY_COST * error_tolerance * 2; } else { costs.noc_cost_terms.latency_overrun += costs.noc_cost_terms.latency_overrun * error_tolerance * 2; } if (costs.noc_cost_terms.congestion == 0) { - costs.noc_cost_terms.congestion += MIN_EXPECTED_NOC_CONGESTION_COST * error_tolerance * 2; + costs.noc_cost_terms.congestion += NocCostHandler::MIN_EXPECTED_NOC_CONGESTION_COST * error_tolerance * 2; } else { costs.noc_cost_terms.congestion += costs.noc_cost_terms.congestion * error_tolerance * 2; } // run the test function - int error = check_noc_placement_costs(costs, error_tolerance, noc_opts, block_locs); + int error = noc_cost_handler.check_noc_placement_costs(costs, error_tolerance, noc_opts); // we expect error to be 4 here, meaning the found costs are not within the tolerance range REQUIRE(error == 4); diff --git a/vpr/test/test_noc_storage.cpp b/vpr/test/test_noc_storage.cpp index 2f24b4a6542..0320fa130dc 100644 --- a/vpr/test/test_noc_storage.cpp +++ b/vpr/test/test_noc_storage.cpp @@ -30,7 +30,7 @@ TEST_CASE("test_adding_routers_to_noc_storage", "[vpr_noc]") { int router_grid_position_x; int router_grid_position_y; - // testing datastructure + // testing data structure NocStorage test_noc; // store the reference to device grid with @@ -82,7 +82,7 @@ TEST_CASE("test_router_id_conversion", "[vpr_noc]") { int router_grid_position_x; int router_grid_position_y; - // testing datastructure + // testing data structure NocStorage test_noc; // store the reference to device grid with @@ -129,7 +129,7 @@ TEST_CASE("test_add_link", "[vpr_noc]") { NocLinkId link_id; - // testing datastructure + // testing data structure NocStorage test_noc; // store the reference to device grid with @@ -215,7 +215,7 @@ TEST_CASE("test_router_link_list", "[vpr_noc]") { NocRouterId source; NocRouterId sink; - // testing datastructure + // testing data structure NocStorage test_noc; // store the reference to device grid with @@ -414,7 +414,7 @@ TEST_CASE("test_generate_router_key_from_grid_location", "[vpr_noc]") { int router_grid_position_x; int router_grid_position_y; - // testing datastructure + // testing data structure NocStorage test_noc; // store the reference to device grid with diff --git a/vpr/test/test_noc_traffic_flows.cpp b/vpr/test/test_noc_traffic_flows.cpp index 1ee20f0aefc..1afd37727e6 100644 --- a/vpr/test/test_noc_traffic_flows.cpp +++ b/vpr/test/test_noc_traffic_flows.cpp @@ -9,7 +9,7 @@ namespace { TEST_CASE("test_adding_traffic_flows", "[vpr_noc_traffic_flows]") { - // the traffic flows datastructure and reset it + // the traffic flows data structure and reset it NocTrafficFlows traffic_flow_storage; traffic_flow_storage.clear_traffic_flows(); @@ -61,7 +61,7 @@ TEST_CASE("test_adding_traffic_flows", "[vpr_noc_traffic_flows]") { // finished setting up all the golden information, so now perform the tests SECTION("Verifying that all created traffic flows and their related information are stored correctly.") { - // add all the traffic flows to the datastructure + // add all the traffic flows to the data structure for (int router = 0; router < NUM_OF_ROUTERS; router++) { for (int second_router = 0; second_router < NUM_OF_ROUTERS; second_router++) { // don't want the case where the source and destination routers are the same @@ -81,7 +81,7 @@ TEST_CASE("test_adding_traffic_flows", "[vpr_noc_traffic_flows]") { // check the set of routers first to see that they were all added properly for (int router = 0; router < size_of_router_block_list; router++) { - // every router in the golden list needs to exist in the traffic flow datastructure (this also tests cases where a router was added multiple times, this shouldn't affect it) + // every router in the golden list needs to exist in the traffic flow data structure (this also tests cases where a router was added multiple times, this shouldn't affect it) REQUIRE(traffic_flow_storage.check_if_cluster_block_has_traffic_flows(golden_router_blocks_list[router]) == true); } @@ -103,7 +103,7 @@ TEST_CASE("test_adding_traffic_flows", "[vpr_noc_traffic_flows]") { int number_of_traffic_flows_associated_with_current_router = golden_list_of_associated_traffic_flows_to_routers[router_id].size(); - // get the traffic flows associated to the current router from the test datastructure + // get the traffic flows associated to the current router from the test data structure const std::vector& associated_traffic_flows_to_router = traffic_flow_storage.get_traffic_flows_associated_to_router_block(router_id); // make sure that the number of traffic flows associated to each router within the NocTrafficFlows data structure matches the golden set @@ -122,7 +122,7 @@ TEST_CASE("test_adding_traffic_flows", "[vpr_noc_traffic_flows]") { // create an invalid block id ClusterBlockId invalid_block = (ClusterBlockId)(NUM_OF_ROUTERS + 1); - // check that this block doesn't exist in the traffic flow datastructure + // check that this block doesn't exist in the traffic flow data structure REQUIRE(traffic_flow_storage.check_if_cluster_block_has_traffic_flows(invalid_block) == false); } SECTION("Checking that when a router has no traffic flows associated to it, then the associated traffic flows vector retrieved from the NocTrafficFlows class for this router should be null.") { diff --git a/vpr/test/test_read_xml_noc_traffic_flows_file.cpp b/vpr/test/test_read_xml_noc_traffic_flows_file.cpp index 8ea4c8e2cae..f16a5d9feff 100644 --- a/vpr/test/test_read_xml_noc_traffic_flows_file.cpp +++ b/vpr/test/test_read_xml_noc_traffic_flows_file.cpp @@ -114,7 +114,7 @@ TEST_CASE("test_get_router_module_cluster_id", "[vpr_noc_traffic_flows_parser]") pugi::xml_node test; pugiutil::loc_data test_location; - // datastructure to keep track of blocks name to its id + // data structure to keep track of blocks name to its id std::map block_id_from_name; // get the global netlist @@ -173,7 +173,7 @@ TEST_CASE("test_get_router_module_cluster_id", "[vpr_noc_traffic_flows_parser]") block_id_from_name.emplace(io_port_three, test_netlist->create_block(io_port_three, nullptr, i_o_ref)); block_id_from_name.emplace(io_port_four, test_netlist->create_block(io_port_four, nullptr, i_o_ref)); - // datastructure to store all the cluster block IDs of the noc router logical block type clusters + // data structure to store all the cluster block IDs of the noc router logical block type clusters std::vector noc_router_logical_type_clusters; SECTION("Test case where the block is found in the clustered netlist") { @@ -215,7 +215,7 @@ TEST_CASE("test_get_router_module_cluster_id", "[vpr_noc_traffic_flows_parser]") REQUIRE((size_t)(block_id_from_name.find("router:noc_router_five|flit_out_two[0]~reg0")->second) == (size_t)test_router_block_id); - // clear the global netlist datastructure so other unit tests that rely on dont use a corrupted netlist + // clear the global netlist data structure so other unit tests that rely on dont use a corrupted netlist free_clustered_netlist(); } SECTION("Test case where the block is not found in the clustered netlist") { @@ -255,7 +255,7 @@ TEST_CASE("test_get_router_module_cluster_id", "[vpr_noc_traffic_flows_parser]") // This should fail, so check that it does REQUIRE_THROWS_WITH(get_router_module_cluster_id(test_router_module_name, cluster_ctx, test, test_location, noc_router_logical_type_clusters), "The router module '^router:noc_router_seven|flit_out_two[0]~reg0$' does not exist in the design."); - // clear the global netlist datastructure so other unit tests that rely on dont use a corrupted netlist + // clear the global netlist data structure so other unit tests that rely on dont use a corrupted netlist free_clustered_netlist(); } } @@ -316,7 +316,7 @@ TEST_CASE("test_check_traffic_flow_router_module_type", "[vpr_noc_traffic_flows_ // the function should not fail since the module is a router REQUIRE_NOTHROW(check_traffic_flow_router_module_type(router_one, router_module_id, test, test_location, cluster_ctx, noc_router_ref)); - // clear the global netlist datastructure so other unit tests that rely on dont use a corrupted netlist + // clear the global netlist data structure so other unit tests that rely on dont use a corrupted netlist free_clustered_netlist(); } SECTION("Test case where the traffic flow module is not of type router") { @@ -330,7 +330,7 @@ TEST_CASE("test_check_traffic_flow_router_module_type", "[vpr_noc_traffic_flows_ // the function should faile since the module is of type IO REQUIRE_THROWS_WITH(check_traffic_flow_router_module_type(io_block_one, io_module_id, test, test_location, cluster_ctx, noc_router_ref), "The supplied module name 'io_block_one' is not a NoC router."); - // clear the global netlist datastructure so other unit tests that rely on dont use a corrupted netlist + // clear the global netlist data structure so other unit tests that rely on dont use a corrupted netlist free_clustered_netlist(); } } @@ -414,7 +414,7 @@ TEST_CASE("test_check_that_all_router_blocks_have_an_associated_traffic_flow", " // we expect this to pass CHECK(check_that_all_router_blocks_have_an_associated_traffic_flow(noc_ctx, noc_router_ref, test_noc_traffic_flows_file_name) == true); - // clear the global netlist datastructure so other unit tests that rely on dont use a corrupted netlist + // clear the global netlist data structure so other unit tests that rely on dont use a corrupted netlist free_clustered_netlist(); // clear the global device @@ -430,7 +430,7 @@ TEST_CASE("test_check_that_all_router_blocks_have_an_associated_traffic_flow", " // we expect this fail CHECK(check_that_all_router_blocks_have_an_associated_traffic_flow(noc_ctx, noc_router_ref, test_noc_traffic_flows_file_name) == false); - // clear the global netlist datastructure so other unit tests that rely on dont use a corrupted netlist + // clear the global netlist data structure so other unit tests that rely on dont use a corrupted netlist free_clustered_netlist(); // clear the global device @@ -523,7 +523,7 @@ TEST_CASE("test_get_cluster_blocks_compatible_with_noc_router_tiles", "[vpr_noc_ REQUIRE(std::find(found_cluster_blocks_that_are_noc_router_compatible.begin(), found_cluster_blocks_that_are_noc_router_compatible.end(), *golden_set_router_block_id) != found_cluster_blocks_that_are_noc_router_compatible.end()); } - // clear the global netlist datastructure so other unit tests that rely on dont use a corrupted netlist + // clear the global netlist data structure so other unit tests that rely on dont use a corrupted netlist free_clustered_netlist(); } SECTION("Test case where non router blocks are correctly identified within the netlist and ignored.") { @@ -549,7 +549,7 @@ TEST_CASE("test_get_cluster_blocks_compatible_with_noc_router_tiles", "[vpr_noc_ // since there were no router blocks in this netlist, check that the test found function 0 blocks that were compatible with a noc router tile REQUIRE(found_cluster_blocks_that_are_noc_router_compatible.size() == 0); - // clear the global netlist datastructure so other unit tests that rely on dont use a corrupted netlist + // clear the global netlist data structure so other unit tests that rely on dont use a corrupted netlist free_clustered_netlist(); } } diff --git a/vpr/test/test_setup_noc.cpp b/vpr/test/test_setup_noc.cpp index 2354e9562d3..d1c14c211a8 100644 --- a/vpr/test/test_setup_noc.cpp +++ b/vpr/test/test_setup_noc.cpp @@ -397,7 +397,7 @@ TEST_CASE("test_identify_and_store_noc_router_tile_positions", "[vpr_setup_noc]" } } TEST_CASE("test_create_noc_routers", "[vpr_setup_noc]") { - // datastructure to hold the list of physical tiles + // data structure to hold the list of physical tiles vtr::vector list_of_routers; /* @@ -583,7 +583,7 @@ TEST_CASE("test_create_noc_routers", "[vpr_setup_noc]") { } } TEST_CASE("test_create_noc_links", "[vpr_setup_noc]") { - // datastructure to hold the list of physical tiles + // data structure to hold the list of physical tiles std::vector list_of_routers; /* @@ -725,7 +725,7 @@ TEST_CASE("test_setup_noc", "[vpr_setup_noc]") { // this is similar to the user provided a config file temp_router = new t_router; - // datastructure to hold the list of physical tiles + // data structure to hold the list of physical tiles std::vector list_of_routers; // get a mutable to the device context diff --git a/vpr/test/test_xy_routing.cpp b/vpr/test/test_xy_routing.cpp index 18f45f1231c..8b79960c4c8 100644 --- a/vpr/test/test_xy_routing.cpp +++ b/vpr/test/test_xy_routing.cpp @@ -46,7 +46,7 @@ TEST_CASE("test_route_flow", "[vpr_noc_xy_routing]") { * */ - // Create the NoC datastructure + // Create the NoC data structure NocStorage noc_model; // store the reference to device grid with @@ -238,7 +238,7 @@ TEST_CASE("test_route_flow when it fails in a mesh topology.", "[vpr_noc_xy_rout * */ - // Create the NoC datastructure + // Create the NoC data structure NocStorage noc_model; // store the reference to device grid with @@ -353,7 +353,7 @@ TEST_CASE("test_route_flow when it fails in a non mesh topology.", "[vpr_noc_xy_ * 0------------3 */ - // Create the NoC datastructure + // Create the NoC data structure NocStorage noc_model; // store the reference to device grid with diff --git a/vtr_flow/scripts/noc/noc_benchmark_test.py b/vtr_flow/scripts/noc/noc_benchmark_test.py index b47e5c1cd6b..54f045962b3 100755 --- a/vtr_flow/scripts/noc/noc_benchmark_test.py +++ b/vtr_flow/scripts/noc/noc_benchmark_test.py @@ -696,7 +696,7 @@ def execute_vpr_and_process_output(vpr_command_list, num_of_seeds, num_of_thread args = noc_test_command_line_parser().parse_args(sys.argv[1:]) """ - local datastructures that store the design file locations, their + local data structures that store the design file locations, their corresponding noc traffic flow files and the design name """ design_files_in_dir = []