diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingReporter.cpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingReporter.cpp index 69c9ba07a83..69e8e4bbd18 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingReporter.cpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingReporter.cpp @@ -606,7 +606,7 @@ Time TimingReporter::report_timing_data_arrival_subpath(std::ostream& os, { //Input constraint - TATUM_ASSERT(subpath.elements().size() > 0); + TATUM_ASSERT(!subpath.elements().empty()); const TimingPathElem& path_elem = *(subpath.elements().begin()); Time input_constraint; @@ -712,7 +712,7 @@ bool TimingReporter::nearly_equal(const Time& lhs, const Time& rhs) const { size_t TimingReporter::estimate_point_print_width(const TimingPath& path) const { size_t width = 60; //default - for(auto subpath : {path.clock_launch_path(), path.data_arrival_path(), path.clock_capture_path()}) { + for(const auto& subpath : {path.clock_launch_path(), path.data_arrival_path(), path.clock_capture_path()}) { for(auto elem : subpath.elements()) { //Take the longest typical point name std::string point = name_resolver_.node_name(elem.node()) + " (" + name_resolver_.node_type_name(elem.node()) + ")"; diff --git a/utils/fasm/src/fasm.cpp b/utils/fasm/src/fasm.cpp index 5a1f314d331..9968473eb64 100644 --- a/utils/fasm/src/fasm.cpp +++ b/utils/fasm/src/fasm.cpp @@ -37,9 +37,9 @@ void FasmWriterVisitor::visit_top_impl(const char* top_level_name) { } void FasmWriterVisitor::visit_clb_impl(ClusterBlockId blk_id, const t_pb* clb) { - auto& place_ctx = g_vpr_ctx.placement(); auto& device_ctx = g_vpr_ctx.device(); auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& block_locs = g_vpr_ctx.placement().block_locs(); current_blk_id_ = blk_id; @@ -48,10 +48,10 @@ void FasmWriterVisitor::visit_clb_impl(ClusterBlockId blk_id, const t_pb* clb) { root_clb_ = clb->pb_graph_node; - int x = place_ctx.block_locs[blk_id].loc.x; - int y = place_ctx.block_locs[blk_id].loc.y; - int layer_num = place_ctx.block_locs[blk_id].loc.layer; - int sub_tile = place_ctx.block_locs[blk_id].loc.sub_tile; + int x = block_locs[blk_id].loc.x; + int y = block_locs[blk_id].loc.y; + int layer_num = block_locs[blk_id].loc.layer; + int sub_tile = block_locs[blk_id].loc.sub_tile; physical_tile_ = device_ctx.grid.get_physical_type({x, y, layer_num}); logical_block_ = cluster_ctx.clb_nlist.block_type(blk_id); const auto& grid_meta = device_ctx.grid.get_metadata({x, y, layer_num}); diff --git a/utils/fasm/test/test_fasm.cpp b/utils/fasm/test/test_fasm.cpp index b700211825f..31c358d9d17 100644 --- a/utils/fasm/test/test_fasm.cpp +++ b/utils/fasm/test/test_fasm.cpp @@ -569,7 +569,7 @@ TEST_CASE("fasm_integration_test", "[fasm]") { // Verify occupied grid LOCs const auto & place_ctx = g_vpr_ctx.placement(); - for (const auto& loc: place_ctx.block_locs) { + for (const auto& loc: place_ctx.block_locs()) { // Do not consider "IOB" tiles. They do not have fasm features // defined in the arch. diff --git a/vpr/src/analysis/timing_reports.cpp b/vpr/src/analysis/timing_reports.cpp index 88762367a95..7ff47a81cf9 100644 --- a/vpr/src/analysis/timing_reports.cpp +++ b/vpr/src/analysis/timing_reports.cpp @@ -12,13 +12,18 @@ #include "VprTimingGraphResolver.h" -void generate_setup_timing_stats(const std::string& prefix, const SetupTimingInfo& timing_info, const AnalysisDelayCalculator& delay_calc, const t_analysis_opts& analysis_opts, bool is_flat) { +void generate_setup_timing_stats(const std::string& prefix, + const SetupTimingInfo& timing_info, + const AnalysisDelayCalculator& delay_calc, + const t_analysis_opts& analysis_opts, + bool is_flat, + const BlkLocRegistry& blk_loc_registry) { auto& timing_ctx = g_vpr_ctx.timing(); auto& atom_ctx = g_vpr_ctx.atom(); print_setup_timing_summary(*timing_ctx.constraints, *timing_info.setup_analyzer(), "Final ", analysis_opts.write_timing_summary); - VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, delay_calc, is_flat); + VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, delay_calc, is_flat, blk_loc_registry); resolver.set_detail_level(analysis_opts.timing_report_detail); tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph, *timing_ctx.constraints); @@ -32,13 +37,18 @@ void generate_setup_timing_stats(const std::string& prefix, const SetupTimingInf timing_reporter.report_unconstrained_setup(prefix + "report_unconstrained_timing.setup.rpt", *timing_info.setup_analyzer()); } -void generate_hold_timing_stats(const std::string& prefix, const HoldTimingInfo& timing_info, const AnalysisDelayCalculator& delay_calc, const t_analysis_opts& analysis_opts, bool is_flat) { +void generate_hold_timing_stats(const std::string& prefix, + const HoldTimingInfo& timing_info, + const AnalysisDelayCalculator& delay_calc, + const t_analysis_opts& analysis_opts, + bool is_flat, + const BlkLocRegistry& blk_loc_registry) { auto& timing_ctx = g_vpr_ctx.timing(); auto& atom_ctx = g_vpr_ctx.atom(); print_hold_timing_summary(*timing_ctx.constraints, *timing_info.hold_analyzer(), "Final "); - VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, delay_calc, is_flat); + VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, delay_calc, is_flat, blk_loc_registry); resolver.set_detail_level(analysis_opts.timing_report_detail); tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph, *timing_ctx.constraints); diff --git a/vpr/src/analysis/timing_reports.h b/vpr/src/analysis/timing_reports.h index f09f20127e5..72e1013dece 100644 --- a/vpr/src/analysis/timing_reports.h +++ b/vpr/src/analysis/timing_reports.h @@ -5,7 +5,20 @@ #include "AnalysisDelayCalculator.h" #include "vpr_types.h" -void generate_setup_timing_stats(const std::string& prefix, const SetupTimingInfo& timing_info, const AnalysisDelayCalculator& delay_calc, const t_analysis_opts& report_detail, bool is_flat); -void generate_hold_timing_stats(const std::string& prefix, const HoldTimingInfo& timing_info, const AnalysisDelayCalculator& delay_calc, const t_analysis_opts& report_detail, bool is_flat); +class BlkLocRegistry; + +void generate_setup_timing_stats(const std::string& prefix, + const SetupTimingInfo& timing_info, + const AnalysisDelayCalculator& delay_calc, + const t_analysis_opts& report_detail, + bool is_flat, + const BlkLocRegistry& blk_loc_registry); + +void generate_hold_timing_stats(const std::string& prefix, + const HoldTimingInfo& timing_info, + const AnalysisDelayCalculator& delay_calc, + const t_analysis_opts& report_detail, + bool is_flat, + const BlkLocRegistry& blk_loc_registry); #endif diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index cdf51112e46..b9e32702b5d 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -527,10 +527,10 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts, VTR_LOG("PlacerOpts.pad_loc_type: "); switch (PlacerOpts.pad_loc_type) { - case FREE: + case e_pad_loc_type::FREE: VTR_LOG("FREE\n"); break; - case RANDOM: + case e_pad_loc_type::RANDOM: VTR_LOG("RANDOM\n"); break; default: diff --git a/vpr/src/base/blk_loc_registry.cpp b/vpr/src/base/blk_loc_registry.cpp new file mode 100644 index 00000000000..b8a6348dc1c --- /dev/null +++ b/vpr/src/base/blk_loc_registry.cpp @@ -0,0 +1,114 @@ + +#include "blk_loc_registry.h" +#include "globals.h" + +const vtr::vector_map& BlkLocRegistry::block_locs() const { + return block_locs_; +} + +vtr::vector_map& BlkLocRegistry::mutable_block_locs() { + return block_locs_; +} + +const GridBlock& BlkLocRegistry::grid_blocks() const { + return grid_blocks_; +} + +GridBlock& BlkLocRegistry::mutable_grid_blocks() { + return grid_blocks_; +} + +const vtr::vector_map& BlkLocRegistry::physical_pins() const { + return physical_pins_; +} + +vtr::vector_map& BlkLocRegistry::mutable_physical_pins() { + return physical_pins_; +} + +int BlkLocRegistry::tile_pin_index(const ClusterPinId pin) const { + return physical_pins_[pin]; +} + +int BlkLocRegistry::net_pin_to_tile_pin_index(const ClusterNetId net_id, int net_pin_index) const { + auto& cluster_ctx = g_vpr_ctx.clustering(); + + // Get the logical pin index of pin within its logical block type + ClusterPinId pin_id = cluster_ctx.clb_nlist.net_pin(net_id, net_pin_index); + + return this->tile_pin_index(pin_id); +} + +void BlkLocRegistry::set_block_location(ClusterBlockId blk_id, const t_pl_loc& location) { + auto& device_ctx = g_vpr_ctx.device(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + + const std::string& block_name = cluster_ctx.clb_nlist.block_name(blk_id); + + //Check if block location is out of range of grid dimensions + if (location.x < 0 || location.x > int(device_ctx.grid.width() - 1) + || location.y < 0 || location.y > int(device_ctx.grid.height() - 1)) { + VPR_THROW(VPR_ERROR_PLACE, "Block %s with ID %d is out of range at location (%d, %d). \n", + block_name.c_str(), blk_id, location.x, location.y); + } + + //Set the location of the block + block_locs_[blk_id].loc = location; + + //Check if block is at an illegal location + auto physical_tile = device_ctx.grid.get_physical_type({location.x, location.y, location.layer}); + auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id); + + if (location.sub_tile >= physical_tile->capacity || location.sub_tile < 0) { + VPR_THROW(VPR_ERROR_PLACE, "Block %s subtile number (%d) is out of range. \n", block_name.c_str(), location.sub_tile); + } + + if (!is_sub_tile_compatible(physical_tile, logical_block, block_locs_[blk_id].loc.sub_tile)) { + VPR_THROW(VPR_ERROR_PLACE, "Attempt to place block %s with ID %d at illegal location (%d,%d,%d). \n", + block_name.c_str(), + blk_id, + location.x, + location.y, + location.layer); + } + + //Mark the grid location and usage of the block + grid_blocks_.set_block_at_location(location, blk_id); + grid_blocks_.increment_usage({location.x, location.y, location.layer}); + + place_sync_external_block_connections(blk_id); +} + +void BlkLocRegistry::place_sync_external_block_connections(ClusterBlockId iblk) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& clb_nlist = cluster_ctx.clb_nlist; + + t_pl_loc block_loc = block_locs_[iblk].loc; + + auto physical_tile = physical_tile_type(block_loc); + auto logical_block = clb_nlist.block_type(iblk); + + int sub_tile_index = get_sub_tile_index(iblk, block_locs_); + auto sub_tile = physical_tile->sub_tiles[sub_tile_index]; + + VTR_ASSERT(sub_tile.num_phy_pins % sub_tile.capacity.total() == 0); + + int max_num_block_pins = sub_tile.num_phy_pins / sub_tile.capacity.total(); + /* Logical location and physical location is offset by z * max_num_block_pins */ + + int rel_capacity = block_loc.sub_tile - sub_tile.capacity.low; + + for (ClusterPinId pin : clb_nlist.block_pins(iblk)) { + int logical_pin_index = clb_nlist.pin_logical_index(pin); + int sub_tile_pin_index = get_sub_tile_physical_pin(sub_tile_index, physical_tile, logical_block, logical_pin_index); + + int new_physical_pin_index = sub_tile.sub_tile_to_tile_pin_indices[sub_tile_pin_index + rel_capacity * max_num_block_pins]; + + auto result = physical_pins_.find(pin); + if (result != physical_pins_.end()) { + physical_pins_[pin] = new_physical_pin_index; + } else { + physical_pins_.insert(pin, new_physical_pin_index); + } + } +} diff --git a/vpr/src/base/blk_loc_registry.h b/vpr/src/base/blk_loc_registry.h new file mode 100644 index 00000000000..99567d17e0a --- /dev/null +++ b/vpr/src/base/blk_loc_registry.h @@ -0,0 +1,85 @@ +#ifndef VTR_BLK_LOC_REGISTRY_H +#define VTR_BLK_LOC_REGISTRY_H + +#include "clustered_netlist_fwd.h" +#include "vtr_vector_map.h" +#include "vpr_types.h" +#include "grid_block.h" + +struct t_block_loc; + +/** + * @class BlkLocRegistry contains information about the placement of clustered blocks. + * More specifically: + * 1) block_locs stores the location where each clustered blocks is placed at. + * 2) grid_blocks stores which blocks (if any) are placed at a given location. + * 3) physical_pins stores the mapping between the pins of a clustered block and + * the pins of the physical tile where the clustered blocks is placed. + * + */ +class BlkLocRegistry { + public: + BlkLocRegistry() = default; + ~BlkLocRegistry() = default; + BlkLocRegistry(const BlkLocRegistry&) = delete; + BlkLocRegistry& operator=(const BlkLocRegistry&) = default; + BlkLocRegistry(BlkLocRegistry&&) = delete; + BlkLocRegistry& operator=(BlkLocRegistry&&) = delete; + + private: + ///@brief Clustered block placement locations + vtr::vector_map block_locs_; + + ///@brief Clustered block associated with each grid location (i.e. inverse of block_locs) + GridBlock grid_blocks_; + + ///@brief Clustered pin placement mapping with physical pin + vtr::vector_map physical_pins_; + + public: + const vtr::vector_map& block_locs() const; + vtr::vector_map& mutable_block_locs(); + + const GridBlock& grid_blocks() const; + GridBlock& mutable_grid_blocks(); + + const vtr::vector_map& physical_pins() const; + vtr::vector_map& mutable_physical_pins(); + + ///@brief Returns the physical pin of the tile, related to the given ClusterPinId + int tile_pin_index(const ClusterPinId pin) const; + + ///@brief Returns the physical pin of the tile, related to the given ClusterNedId, and the net pin index. + int net_pin_to_tile_pin_index(const ClusterNetId net_id, int net_pin_index) const; + + /** + * @brief Performs error checking to see if location is legal for block type, + * and sets the location and grid usage of the block if it is legal. + * @param blk_id The unique ID of the clustered block whose location is to set. + * @param location The location where the clustered block should placed at. + */ + void set_block_location(ClusterBlockId blk_id, const t_pl_loc& location); + + /** + * @brief Syncs the logical block pins corresponding to the input iblk with the corresponding chosen physical tile + * @param iblk cluster block ID to sync within the assigned physical tile + * + * This routine updates the physical pins vector of the place context after the placement step + * to synchronize the pins related to the logical block with the actual connection interface of + * the belonging physical tile with the RR graph. + * + * This step is required as the logical block can be placed at any compatible sub tile locations + * within a physical tile. + * Given that it is possible to have equivalent logical blocks within a specific sub tile, with + * a different subset of IO pins, the various pins offsets must be correctly computed and assigned + * to the physical pins vector, so that, when the net RR terminals are computed, the correct physical + * tile IO pins are selected. + * + * This routine uses the x,y and sub_tile coordinates of the clb netlist, and expects those to place each netlist block + * at a legal location that can accommodate it. + * It does not check for overuse of locations, therefore it can be used with placements that have resource overuse. + */ + void place_sync_external_block_connections(ClusterBlockId iblk); +}; + +#endif //VTR_BLK_LOC_REGISTRY_H diff --git a/vpr/src/base/grid_block.cpp b/vpr/src/base/grid_block.cpp new file mode 100644 index 00000000000..9053830473e --- /dev/null +++ b/vpr/src/base/grid_block.cpp @@ -0,0 +1,59 @@ + +#include "grid_block.h" + +#include "globals.h" + +void GridBlock::zero_initialize() { + auto& device_ctx = g_vpr_ctx.device(); + + /* Initialize all occupancy to zero. */ + for (int layer_num = 0; layer_num < (int)device_ctx.grid.get_num_layers(); layer_num++) { + for (int i = 0; i < (int)device_ctx.grid.width(); i++) { + for (int j = 0; j < (int)device_ctx.grid.height(); j++) { + set_usage({i, j, layer_num}, 0); + auto tile = device_ctx.grid.get_physical_type({i, j, layer_num}); + + for (const auto& sub_tile : tile->sub_tiles) { + auto capacity = sub_tile.capacity; + + for (int k = 0; k < capacity.total(); k++) { + set_block_at_location({i, j, k + capacity.low, layer_num}, ClusterBlockId::INVALID()); + } + } + } + } + } +} + +void GridBlock::load_from_block_locs(const vtr::vector_map& block_locs) { + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& device_ctx = g_vpr_ctx.device(); + + zero_initialize(); + + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + t_pl_loc location = block_locs[blk_id].loc; + + VTR_ASSERT(location.x < (int)device_ctx.grid.width()); + VTR_ASSERT(location.y < (int)device_ctx.grid.height()); + + set_block_at_location(location, blk_id); + increment_usage({location.x, location.y, location.layer}); + } +} + +int GridBlock::increment_usage(const t_physical_tile_loc& loc) { + int curr_usage = get_usage(loc); + int updated_usage = set_usage(loc, curr_usage + 1); + + return updated_usage; +} + +int GridBlock::decrement_usage(const t_physical_tile_loc& loc) { + int curr_usage = get_usage(loc); + int updated_usage = set_usage(loc, curr_usage - 1); + + return updated_usage; +} + + diff --git a/vpr/src/base/grid_block.h b/vpr/src/base/grid_block.h new file mode 100644 index 00000000000..12e934f0af9 --- /dev/null +++ b/vpr/src/base/grid_block.h @@ -0,0 +1,92 @@ +#ifndef VTR_GRID_BLOCK_H +#define VTR_GRID_BLOCK_H + +#include "clustered_netlist_fwd.h" +#include "physical_types.h" +#include "vpr_types.h" + +#include + +///@brief Stores the clustered blocks placed at a particular grid location +struct t_grid_blocks { + int usage; ///capacity] + */ + std::vector blocks; + + /** + * @brief Test if a subtile at a grid location is occupied by a block. + * + * Returns true if the subtile corresponds to the passed-in id is not + * occupied by a block at this grid location. The subtile id serves + * as the z-dimensional offset in the grid indexing. + */ + inline bool subtile_empty(size_t isubtile) const { + return blocks[isubtile] == ClusterBlockId::INVALID(); + } +}; + +class GridBlock { + public: + GridBlock() = default; + + GridBlock(size_t width, size_t height, size_t layers) { + grid_blocks_.resize({layers, width, height}); + } + + inline void initialized_grid_block_at_location(const t_physical_tile_loc& loc, int num_sub_tiles) { + grid_blocks_[loc.layer_num][loc.x][loc.y].blocks.resize(num_sub_tiles, ClusterBlockId::INVALID()); + } + + inline void set_block_at_location(const t_pl_loc& loc, ClusterBlockId blk_id) { + grid_blocks_[loc.layer][loc.x][loc.y].blocks[loc.sub_tile] = blk_id; + } + + inline ClusterBlockId block_at_location(const t_pl_loc& loc) const { + return grid_blocks_[loc.layer][loc.x][loc.y].blocks[loc.sub_tile]; + } + + inline size_t num_blocks_at_location(const t_physical_tile_loc& loc) const { + return grid_blocks_[loc.layer_num][loc.x][loc.y].blocks.size(); + } + + inline int set_usage(const t_physical_tile_loc loc, int usage) { + return grid_blocks_[loc.layer_num][loc.x][loc.y].usage = usage; + } + + inline int get_usage(const t_physical_tile_loc loc) const { + return grid_blocks_[loc.layer_num][loc.x][loc.y].usage; + } + + inline bool is_sub_tile_empty(const t_physical_tile_loc loc, int sub_tile) const { + return grid_blocks_[loc.layer_num][loc.x][loc.y].subtile_empty(sub_tile); + } + + inline void clear() { + grid_blocks_.clear(); + } + + /** + * @brief Initialize usage to 0 and blockID to INVALID for all grid block locations + */ + void zero_initialize(); + + /** + * @brief Initializes the GridBlock object with the given block_locs. + * @param block_locs Stores the location of each clustered block. + */ + void load_from_block_locs(const vtr::vector_map& block_locs); + + int increment_usage(const t_physical_tile_loc& loc); + + int decrement_usage(const t_physical_tile_loc& loc); + + private: + vtr::NdMatrix grid_blocks_; +}; + +#endif //VTR_GRID_BLOCK_H diff --git a/vpr/src/base/load_flat_place.cpp b/vpr/src/base/load_flat_place.cpp index 01b2978a310..f13b9f35167 100644 --- a/vpr/src/base/load_flat_place.cpp +++ b/vpr/src/base/load_flat_place.cpp @@ -9,12 +9,13 @@ static void print_flat_cluster(FILE* fp, ClusterBlockId iblk, static void print_flat_cluster(FILE* fp, ClusterBlockId iblk, std::vector& atoms) { + const auto& atom_ctx = g_vpr_ctx.atom(); + const auto& block_locs = g_vpr_ctx.placement().block_locs(); - auto& atom_ctx = g_vpr_ctx.atom(); - t_pl_loc loc = g_vpr_ctx.placement().block_locs[iblk].loc; + t_pl_loc loc = block_locs[iblk].loc; size_t bnum = size_t(iblk); - for (auto atom : atoms) { + for (AtomBlockId atom : atoms) { t_pb_graph_node* atom_pbgn = atom_ctx.lookup.atom_pb(atom)->pb_graph_node; fprintf(fp, "%s %d %d %d %d #%zu %s\n", atom_ctx.nlist.block_name(atom).c_str(), loc.x, loc.y, loc.sub_tile, @@ -26,15 +27,16 @@ static void print_flat_cluster(FILE* fp, ClusterBlockId iblk, /* prints a flat placement file */ void print_flat_placement(const char* flat_place_file) { - + const auto& block_locs = g_vpr_ctx.placement().block_locs(); + FILE* fp; ClusterAtomsLookup atoms_lookup; auto& cluster_ctx = g_vpr_ctx.clustering(); - if (!g_vpr_ctx.placement().block_locs.empty()) { + if (!block_locs.empty()) { fp = fopen(flat_place_file, "w"); - for (auto iblk : cluster_ctx.clb_nlist.blocks()) { + for (ClusterBlockId iblk : cluster_ctx.clb_nlist.blocks()) { auto atoms = atoms_lookup.atoms_in_cluster(iblk); print_flat_cluster(fp, iblk, atoms); } diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp index 2701b46cc09..f95ff2b9b00 100644 --- a/vpr/src/base/place_and_route.cpp +++ b/vpr/src/base/place_and_route.cpp @@ -34,6 +34,7 @@ #include "route_common.h" #include "place_macro.h" #include "power.h" +#include "place_util.h" #include "RoutingDelayCalculator.h" #include "timing_info.h" @@ -348,7 +349,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, ScreenUpdatePriority::MINOR, is_flat); - if (success && Fc_clipped == false) { + if (success && !Fc_clipped) { final = current; save_routing(best_routing, route_ctx.clb_opins_used_locally, @@ -357,8 +358,9 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, if (placer_opts.place_freq == PLACE_ALWAYS) { auto& cluster_ctx = g_vpr_ctx.clustering(); // Cluster-based net_list is used for placement - print_place(filename_opts.NetFile.c_str(), cluster_ctx.clb_nlist.netlist_id().c_str(), - filename_opts.PlaceFile.c_str()); + std::string placement_id = print_place(filename_opts.NetFile.c_str(), cluster_ctx.clb_nlist.netlist_id().c_str(), + filename_opts.PlaceFile.c_str(), g_vpr_ctx.placement().block_locs()); + g_vpr_ctx.mutable_placement().placement_id = placement_id; } } @@ -389,7 +391,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, &warnings, is_flat); - init_draw_coords(final); + init_draw_coords(final, g_vpr_ctx.placement().blk_loc_registry()); /* Allocate and load additional rr_graph information needed only by the router. */ alloc_and_load_rr_node_route_structs(); @@ -556,26 +558,28 @@ static float comp_width(t_chan* chan, float x, float separation) { break; } - return (val); + return val; } /** * @brief After placement, logical pins for blocks, and nets must be updated to correspond with physical pins of type. * - * This is required by blocks with capacity > 1 (e.g. typically IOs with multiple instaces in each placement - * gride location). Since they may be swapped around during placement, we need to update which pins the various + * This is required by blocks with capacity > 1 (e.g. typically IOs with multiple instances in each placement + * grid location). Since they may be swapped around during placement, we need to update which pins the various * nets use. * - * This updates both the external inter-block net connecitivity (i.e. the clustered netlist), and the intra-block + * This updates both the external inter-block net connectivity (i.e. the clustered netlist), and the intra-block * connectivity (since the internal pins used also change). * * This function should only be called once */ void post_place_sync() { /* Go through each block */ - auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry(); + // Cluster-based netlist is used for placement - for (auto block_id : cluster_ctx.clb_nlist.blocks()) { - place_sync_external_block_connections(block_id); + for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { + blk_loc_registry.place_sync_external_block_connections(block_id); } } diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 9e737b78d57..bf51e37b059 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -532,9 +532,9 @@ struct ParseFixPins { ConvertedValue from_str(const std::string& str) { ConvertedValue conv_value; if (str == "free") - conv_value.set_value(FREE); + conv_value.set_value(e_pad_loc_type::FREE); else if (str == "random") - conv_value.set_value(RANDOM); + conv_value.set_value(e_pad_loc_type::RANDOM); else { std::stringstream msg; msg << "Invalid conversion from '" << str << "' to e_router_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")"; @@ -545,10 +545,10 @@ struct ParseFixPins { ConvertedValue to_str(e_pad_loc_type val) { ConvertedValue conv_value; - if (val == FREE) + if (val == e_pad_loc_type::FREE) conv_value.set_value("free"); else { - VTR_ASSERT(val == RANDOM); + VTR_ASSERT(val == e_pad_loc_type::RANDOM); conv_value.set_value("random"); } return conv_value; diff --git a/vpr/src/base/read_place.cpp b/vpr/src/base/read_place.cpp index 4e52df3b040..5813a780a26 100644 --- a/vpr/src/base/read_place.cpp +++ b/vpr/src/base/read_place.cpp @@ -1,9 +1,7 @@ #include #include #include -#include -#include "vtr_assert.h" #include "vtr_util.h" #include "vtr_log.h" #include "vtr_digest.h" @@ -17,23 +15,22 @@ #include "read_xml_arch_file.h" #include "place_util.h" -void read_place_header( - std::ifstream& placement_file, - const char* net_file, - const char* place_file, - bool verify_file_hashes, - const DeviceGrid& grid); - -void read_place_body( - std::ifstream& placement_file, - const char* place_file, - bool is_place_file); - -void read_place( - const char* net_file, - const char* place_file, - bool verify_file_digests, - const DeviceGrid& grid) { +static void read_place_header(std::ifstream& placement_file, + const char* net_file, + const char* place_file, + bool verify_file_digests, + const DeviceGrid& grid); + +static std::string read_place_body(std::ifstream& placement_file, + BlkLocRegistry& blk_loc_registry, + const char* place_file, + bool is_place_file); + +std::string read_place(const char* net_file, + const char* place_file, + BlkLocRegistry& blk_loc_registry, + bool verify_file_digests, + const DeviceGrid& grid) { std::ifstream fstream(place_file); if (!fstream) { VPR_FATAL_ERROR(VPR_ERROR_PLACE_F, @@ -47,13 +44,16 @@ void read_place( VTR_LOG("\n"); read_place_header(fstream, net_file, place_file, verify_file_digests, grid); - read_place_body(fstream, place_file, is_place_file); + std::string placement_id = read_place_body(fstream, blk_loc_registry, place_file, is_place_file); VTR_LOG("Successfully read %s.\n", place_file); VTR_LOG("\n"); + + return placement_id; } -void read_constraints(const char* constraints_file) { +void read_constraints(const char* constraints_file, + BlkLocRegistry& blk_loc_registry) { std::ifstream fstream(constraints_file); if (!fstream) { VPR_FATAL_ERROR(VPR_ERROR_PLACE_F, @@ -66,7 +66,7 @@ void read_constraints(const char* constraints_file) { VTR_LOG("Reading %s.\n", constraints_file); VTR_LOG("\n"); - read_place_body(fstream, constraints_file, is_place_file); + read_place_body(fstream, blk_loc_registry, constraints_file, is_place_file); VTR_LOG("Successfully read constraints file %s.\n", constraints_file); VTR_LOG("\n"); @@ -80,11 +80,11 @@ void read_constraints(const char* constraints_file) { * The verify_file_digests bool is used to decide whether to give a warning or an error if the netlist files do not match. * An error is given if the grid size has changed. */ -void read_place_header(std::ifstream& placement_file, - const char* net_file, - const char* place_file, - bool verify_file_digests, - const DeviceGrid& grid) { +static void read_place_header(std::ifstream& placement_file, + const char* net_file, + const char* place_file, + bool verify_file_digests, + const DeviceGrid& grid) { auto& cluster_ctx = g_vpr_ctx.clustering(); std::string line; @@ -205,12 +205,13 @@ void read_place_header(std::ifstream& placement_file, * The bool is_place_file indicates if the file should be read as a place file (is_place_file = true) * or a constraints file (is_place_file = false). */ -void read_place_body(std::ifstream& placement_file, - const char* place_file, - bool is_place_file) { +static std::string read_place_body(std::ifstream& placement_file, + BlkLocRegistry& blk_loc_registry, + const char* place_file, + bool is_place_file) { auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.mutable_placement(); auto& atom_ctx = g_vpr_ctx.atom(); + auto& block_locs = blk_loc_registry.mutable_block_locs(); std::string line; int lineno = 0; @@ -219,7 +220,7 @@ void read_place_body(std::ifstream& placement_file, vtr::vector_map seen_blocks; //initialize seen_blocks - for (auto block_id : cluster_ctx.clb_nlist.blocks()) { + for (ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { int seen_count = 0; seen_blocks.insert(block_id, seen_count); } @@ -286,10 +287,10 @@ void read_place_body(std::ifstream& placement_file, //Check if block is listed multiple times with conflicting locations in constraints file if (seen_blocks[blk_id] > 0) { - if (block_x != place_ctx.block_locs[blk_id].loc.x || - block_y != place_ctx.block_locs[blk_id].loc.y || - sub_tile_index != place_ctx.block_locs[blk_id].loc.sub_tile || - block_layer != place_ctx.block_locs[blk_id].loc.layer) { + if (block_x != block_locs[blk_id].loc.x || + block_y != block_locs[blk_id].loc.y || + sub_tile_index != block_locs[blk_id].loc.sub_tile || + block_layer != block_locs[blk_id].loc.layer) { std::string cluster_name = cluster_ctx.clb_nlist.block_name(blk_id); VPR_THROW(VPR_ERROR_PLACE, "The location of cluster %s (#%d) is specified %d times in the constraints file with conflicting locations. \n" @@ -305,20 +306,20 @@ void read_place_body(std::ifstream& placement_file, loc.layer = block_layer; if (seen_blocks[blk_id] == 0) { - if (is_place_file && place_ctx.block_locs[blk_id].is_fixed) { - const auto& contraint_loc = place_ctx.block_locs[blk_id].loc; - if (loc != contraint_loc) { + if (is_place_file && block_locs[blk_id].is_fixed) { + const t_pl_loc& constraint_loc = block_locs[blk_id].loc; + if (loc != constraint_loc) { VPR_THROW(VPR_ERROR_PLACE, "The new location assigned to cluster #%d is (%d,%d,%d,%d), which is inconsistent with the location specified in the constraint file (%d,%d,%d,%d).", - blk_id, loc.x, loc.y, loc.layer, loc.sub_tile, contraint_loc.x, contraint_loc.y, contraint_loc.layer, contraint_loc.sub_tile); + blk_id, loc.x, loc.y, loc.layer, loc.sub_tile, constraint_loc.x, constraint_loc.y, constraint_loc.layer, constraint_loc.sub_tile); } } - set_block_location(blk_id, loc); + blk_loc_registry.set_block_location(blk_id, loc); } //need to lock down blocks if it is a constraints file if (!is_place_file) { - place_ctx.block_locs[blk_id].is_fixed = true; + block_locs[blk_id].is_fixed = true; } //mark the block as seen @@ -335,7 +336,7 @@ void read_place_body(std::ifstream& placement_file, //For place files, check that all blocks have been read //For constraints files, not all blocks need to be read if (is_place_file) { - for (auto block_id : cluster_ctx.clb_nlist.blocks()) { + for (ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { if (seen_blocks[block_id] == 0) { VPR_THROW(VPR_ERROR_PLACE, "Block %d has not been read from the place file. \n", block_id); } @@ -344,7 +345,9 @@ void read_place_body(std::ifstream& placement_file, //Want to make a hash for place file to be used during routing for error checking if (is_place_file) { - place_ctx.placement_id = vtr::secure_digest_file(place_file); + return vtr::secure_digest_file(place_file); + } else { + return {}; } } @@ -354,15 +357,15 @@ void read_place_body(std::ifstream& placement_file, * The architecture and netlist files used to generate this placement are recorded * in the file to avoid loading a placement with the wrong support file later. */ -void print_place(const char* net_file, - const char* net_id, - const char* place_file, - bool is_place_file) { +std::string print_place(const char* net_file, + const char* net_id, + const char* place_file, + const vtr::vector_map& block_locs, + bool is_place_file) { FILE* fp; auto& device_ctx = g_vpr_ctx.device(); auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.mutable_placement(); fp = fopen(place_file, "w"); @@ -375,10 +378,10 @@ void print_place(const char* net_file, fprintf(fp, "#----------\t--\t--\t------\t-----\t------------\n"); } - if (!place_ctx.block_locs.empty()) { //Only if placement exists - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { + if (!block_locs.empty()) { //Only if placement exists + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { // if block is not placed, skip (useful for printing legalizer output) - if (!is_place_file && (place_ctx.block_locs[blk_id].loc.x == INVALID_X)) { + if (!is_place_file && (block_locs[blk_id].loc.x == INVALID_X)) { continue; } fprintf(fp, "%s\t", cluster_ctx.clb_nlist.block_pb(blk_id)->name); @@ -386,15 +389,15 @@ void print_place(const char* net_file, fprintf(fp, "\t"); fprintf(fp, "%d\t%d\t%d\t%d", - place_ctx.block_locs[blk_id].loc.x, - place_ctx.block_locs[blk_id].loc.y, - place_ctx.block_locs[blk_id].loc.sub_tile, - place_ctx.block_locs[blk_id].loc.layer); + block_locs[blk_id].loc.x, + block_locs[blk_id].loc.y, + block_locs[blk_id].loc.sub_tile, + block_locs[blk_id].loc.layer); fprintf(fp, "\t#%zu\n", size_t(blk_id)); } } fclose(fp); //Calculate the ID of the placement - place_ctx.placement_id = vtr::secure_digest_file(place_file); + return vtr::secure_digest_file(place_file); } diff --git a/vpr/src/base/read_place.h b/vpr/src/base/read_place.h index 4dfd9aef82e..088f17dd546 100644 --- a/vpr/src/base/read_place.h +++ b/vpr/src/base/read_place.h @@ -1,21 +1,31 @@ #ifndef READ_PLACE_H #define READ_PLACE_H +#include "vtr_vector_map.h" + +#include + + +class PlacerState; +class ClusterBlockId; +struct t_block_loc; + /** * This function is for reading a place file when placement is skipped. * It takes in the current netlist file and grid dimensions to check that they match those that were used when placement was generated. * The verify_file_hashes bool is used to decide whether to give a warning or an error if the netlist files do not match. */ -void read_place( - const char* net_file, - const char* place_file, - bool verify_file_hashes, - const DeviceGrid& grid); +std::string read_place(const char* net_file, + const char* place_file, + BlkLocRegistry& blk_loc_registry, + bool verify_file_hashes, + const DeviceGrid& grid); /** * This function is used to read a constraints file that specifies the desired locations of blocks. */ -void read_constraints(const char* constraints_file); +void read_constraints(const char* constraints_file, + BlkLocRegistry& blk_loc_registry); /** * This function prints out a place file. @@ -27,9 +37,10 @@ void read_constraints(const char* constraints_file); * will not be included; this file is used as a placement constraints * file when running placement in order to place orphan clusters. */ -void print_place(const char* net_file, - const char* net_id, - const char* place_file, - bool is_place_file = true); +std::string print_place(const char* net_file, + const char* net_id, + const char* place_file, + const vtr::vector_map& block_locs, + bool is_place_file = true); #endif diff --git a/vpr/src/base/read_route.cpp b/vpr/src/base/read_route.cpp index 777ca226710..998e0c5434f 100644 --- a/vpr/src/base/read_route.cpp +++ b/vpr/src/base/read_route.cpp @@ -222,7 +222,7 @@ static void process_nodes(const Netlist<>& net_list, std::ifstream& fp, ClusterN auto& device_ctx = g_vpr_ctx.mutable_device(); const auto& rr_graph = device_ctx.rr_graph; auto& route_ctx = g_vpr_ctx.mutable_routing(); - auto& place_ctx = g_vpr_ctx.placement(); + const auto& grid_blocks = g_vpr_ctx.placement().grid_blocks(); t_trace* head_ptr = nullptr; t_trace* tptr = nullptr; @@ -323,7 +323,9 @@ static void process_nodes(const Netlist<>& net_list, std::ifstream& fp, ClusterN ptc = atoi(tokens[5 + offset].c_str()); if (rr_graph.node_ptc_num(RRNodeId(inode)) != ptc) { vpr_throw(VPR_ERROR_ROUTE, filename, lineno, - "The ptc num of node %d does not match the rr graph, Running without flat routing; if this file was created with flat routing, re-run vpr with the --flat_routing option", inode); + "The ptc num of node %d does not match the rr graph, Running without flat routing; " + "if this file was created with flat routing, re-run vpr with the --flat_routing option", + inode); } /*Process switches and pb pin info if it is ipin or opin type*/ @@ -335,12 +337,10 @@ static void process_nodes(const Netlist<>& net_list, std::ifstream& fp, ClusterN int width_offset = device_ctx.grid.get_width_offset({x, y, layer_num}); int height_offset = device_ctx.grid.get_height_offset({x, y, layer_num}); auto physical_tile = device_ctx.grid.get_physical_type({x, y, layer_num}); - const t_sub_tile* sub_tile; - int sub_tile_rel_cap; - std::tie(sub_tile, sub_tile_rel_cap) = get_sub_tile_from_pin_physical_num(physical_tile, pin_num); + auto [sub_tile, sub_tile_rel_cap] = get_sub_tile_from_pin_physical_num(physical_tile, pin_num); int sub_tile_offset = sub_tile->capacity.low + sub_tile_rel_cap; - ClusterBlockId iblock = place_ctx.grid_blocks.block_at_location({x - width_offset, y - height_offset, sub_tile_offset, layer_num}); + ClusterBlockId iblock = grid_blocks.block_at_location({x - width_offset, y - height_offset, sub_tile_offset, layer_num}); VTR_ASSERT(iblock); const t_pb_graph_pin* pb_pin; @@ -569,7 +569,7 @@ static bool check_rr_graph_connectivity(RRNodeId prev_node, RRNodeId node) { void print_route(const Netlist<>& net_list, FILE* fp, bool is_flat) { - auto& place_ctx = g_vpr_ctx.placement(); + const auto& grid_blocks = g_vpr_ctx.placement().grid_blocks(); auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; auto& route_ctx = g_vpr_ctx.mutable_routing(); @@ -642,13 +642,11 @@ void print_route(const Netlist<>& net_list, int pin_num = rr_graph.node_pin_num(inode); int xoffset = device_ctx.grid.get_width_offset({ilow, jlow, layer_num}); int yoffset = device_ctx.grid.get_height_offset({ilow, jlow, layer_num}); - const t_sub_tile* sub_tile; - int sub_tile_rel_cap; - std::tie(sub_tile, sub_tile_rel_cap) = get_sub_tile_from_pin_physical_num(physical_tile, pin_num); + auto [sub_tile, sub_tile_rel_cap] = get_sub_tile_from_pin_physical_num(physical_tile, pin_num); int sub_tile_offset = sub_tile->capacity.low + sub_tile_rel_cap; - ClusterBlockId iblock = place_ctx.grid_blocks.block_at_location({ilow - xoffset, jlow - yoffset, - sub_tile_offset, layer_num}); + ClusterBlockId iblock = grid_blocks.block_at_location({ilow - xoffset, jlow - yoffset, + sub_tile_offset, layer_num}); VTR_ASSERT(iblock); const t_pb_graph_pin* pb_pin; if (is_pin_on_tile(physical_tile, pin_num)) { diff --git a/vpr/src/base/stats.cpp b/vpr/src/base/stats.cpp index bc09e68418d..bda53d16a56 100644 --- a/vpr/src/base/stats.cpp +++ b/vpr/src/base/stats.cpp @@ -1,12 +1,9 @@ -#include -#include #include #include #include "route_tree.h" #include "vtr_assert.h" #include "vtr_log.h" -#include "vtr_math.h" #include "vtr_ndmatrix.h" #include "vpr_types.h" @@ -55,11 +52,10 @@ void routing_stats(const Netlist<>& net_list, enum e_directionality directionality, int wire_to_ipin_switch, bool is_flat) { - float area, used_area; - auto& device_ctx = g_vpr_ctx.device(); auto& rr_graph = device_ctx.rr_graph; auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& block_locs = g_vpr_ctx.placement().block_locs(); int num_rr_switch = rr_graph.num_rr_switches(); @@ -69,7 +65,7 @@ void routing_stats(const Netlist<>& net_list, VTR_LOG("Logic area (in minimum width transistor areas, excludes I/Os and empty grid tiles)...\n"); - area = 0; + float area = 0; for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { for (int i = 0; i < (int)device_ctx.grid.width(); i++) { for (int j = 0; j < (int)device_ctx.grid.height(); j++) { @@ -92,9 +88,10 @@ void routing_stats(const Netlist<>& net_list, /* Todo: need to add pitch of routing to blocks with height > 3 */ VTR_LOG("\tTotal logic block area (Warning, need to add pitch of routing to blocks with height > 3): %g\n", area); - used_area = 0; - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - auto type = physical_tile_type(blk_id); + float used_area = 0; + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + t_pl_loc block_loc = block_locs[blk_id].loc; + auto type = physical_tile_type(block_loc); if (!is_io_type(type)) { if (type->area == UNDEFINED) { used_area += grid_logic_tile_area * type->width * type->height; @@ -111,8 +108,9 @@ void routing_stats(const Netlist<>& net_list, get_segment_usage_stats(segment_inf); } - if (full_stats == true) + if (full_stats) { print_wirelen_prob_dist(is_flat); + } } /** @@ -120,26 +118,20 @@ void routing_stats(const Netlist<>& net_list, * and net length in the routing. */ void length_and_bends_stats(const Netlist<>& net_list, bool is_flat) { - int bends, total_bends, max_bends; - int length, total_length, max_length; - int segments, total_segments, max_segments; - float av_bends, av_length, av_segments; - int num_global_nets, num_clb_opins_reserved, num_absorbed_nets; - - bool is_absorbed; - - max_bends = 0; - total_bends = 0; - max_length = 0; - total_length = 0; - max_segments = 0; - total_segments = 0; - num_global_nets = 0; - num_clb_opins_reserved = 0; - num_absorbed_nets = 0; + int max_bends = 0; + int total_bends = 0; + int max_length = 0; + int total_length = 0; + int max_segments = 0; + int total_segments = 0; + int num_global_nets = 0; + int num_clb_opins_reserved = 0; + int num_absorbed_nets = 0; for (auto net_id : net_list.nets()) { if (!net_list.net_is_ignored(net_id) && net_list.net_sinks(net_id).size() != 0) { /* Globals don't count. */ + int bends, length, segments; + bool is_absorbed; get_num_bends_and_length(net_id, &bends, &length, &segments, &is_absorbed); total_bends += bends; @@ -162,12 +154,12 @@ void length_and_bends_stats(const Netlist<>& net_list, bool is_flat) { } } - av_bends = (float)total_bends / (float)((int)net_list.nets().size() - num_global_nets); + float av_bends = (float)total_bends / (float)((int)net_list.nets().size() - num_global_nets); VTR_LOG("\n"); VTR_LOG("Average number of bends per net: %#g Maximum # of bends: %d\n", av_bends, max_bends); VTR_LOG("\n"); - av_length = (float)total_length / (float)((int)net_list.nets().size() - num_global_nets); + float av_length = (float)total_length / (float)((int)net_list.nets().size() - num_global_nets); VTR_LOG("Number of global nets: %d\n", num_global_nets); VTR_LOG("Number of routed nets (nonglobal): %d\n", (int)net_list.nets().size() - num_global_nets); VTR_LOG("Wire length results (in units of 1 clb segments)...\n"); @@ -175,7 +167,7 @@ void length_and_bends_stats(const Netlist<>& net_list, bool is_flat) { VTR_LOG("\tMaximum net length: %d\n", max_length); VTR_LOG("\n"); - av_segments = (float)total_segments / (float)((int)net_list.nets().size() - num_global_nets); + float av_segments = (float)total_segments / (float)((int)net_list.nets().size() - num_global_nets); VTR_LOG("Wire length results in terms of physical segments...\n"); VTR_LOG("\tTotal wiring segments used: %d, average wire segments per net: %#g\n", total_segments, av_segments); VTR_LOG("\tMaximum segments used by a net: %d\n", max_segments); @@ -422,17 +414,17 @@ void print_wirelen_prob_dist(bool is_flat) { * (i.e. the clock when it is marked global). */ void print_lambda() { - int ipin; int num_inputs_used = 0; - float lambda; auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& block_locs = g_vpr_ctx.placement().block_locs(); - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - auto type = physical_tile_type(blk_id); + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + t_pl_loc block_loc = block_locs[blk_id].loc; + auto type = physical_tile_type(block_loc); VTR_ASSERT(type != nullptr); if (!is_io_type(type)) { - for (ipin = 0; ipin < type->num_pins; ipin++) { + for (int ipin = 0; ipin < type->num_pins; ipin++) { if (get_pin_type_from_pin_physical_num(type, ipin) == RECEIVER) { ClusterNetId net_id = cluster_ctx.clb_nlist.block_net(blk_id, ipin); if (net_id != ClusterNetId::INVALID()) /* Pin is connected? */ @@ -443,7 +435,7 @@ void print_lambda() { } } - lambda = (float)num_inputs_used / (float)cluster_ctx.clb_nlist.blocks().size(); + float lambda = (float)num_inputs_used / (float)cluster_ctx.clb_nlist.blocks().size(); VTR_LOG("Average lambda (input pins used per clb) is: %g\n", lambda); } diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index fac75c5fbb3..2bc4dd2a5f9 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -789,7 +789,7 @@ bool vpr_place_flow(const Netlist<>& net_list, t_vpr_setup& vpr_setup, const t_a void vpr_place(const Netlist<>& net_list, t_vpr_setup& vpr_setup, const t_arch& arch) { bool is_flat = false; - if (placer_needs_lookahead(vpr_setup)) { + if (vpr_setup.PlacerOpts.place_algorithm.is_timing_driven()) { // Prime lookahead cache to avoid adding lookahead computation cost to // the placer timer. // Flat_routing is disabled in placement @@ -817,10 +817,13 @@ void vpr_place(const Netlist<>& net_list, t_vpr_setup& vpr_setup, const t_arch& auto& filename_opts = vpr_setup.FileNameOpts; auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& block_locs = g_vpr_ctx.placement().block_locs(); + auto& placement_id = g_vpr_ctx.mutable_placement().placement_id; - print_place(filename_opts.NetFile.c_str(), - cluster_ctx.clb_nlist.netlist_id().c_str(), - filename_opts.PlaceFile.c_str()); + placement_id = print_place(filename_opts.NetFile.c_str(), + cluster_ctx.clb_nlist.netlist_id().c_str(), + filename_opts.PlaceFile.c_str(), + block_locs); } void vpr_load_placement(t_vpr_setup& vpr_setup, const t_arch& arch) { @@ -831,10 +834,14 @@ void vpr_load_placement(t_vpr_setup& vpr_setup, const t_arch& arch) { const auto& filename_opts = vpr_setup.FileNameOpts; //Initialize placement data structures, which will be filled when loading placement - init_placement_context(); + auto& block_locs = place_ctx.mutable_block_locs(); + GridBlock& grid_blocks = place_ctx.mutable_grid_blocks(); + init_placement_context(block_locs, grid_blocks); //Load an existing placement from a file - read_place(filename_opts.NetFile.c_str(), filename_opts.PlaceFile.c_str(), filename_opts.verify_file_digests, device_ctx.grid); + place_ctx.placement_id = read_place(filename_opts.NetFile.c_str(), filename_opts.PlaceFile.c_str(), + place_ctx.mutable_blk_loc_registry(), + filename_opts.verify_file_digests, device_ctx.grid); //Ensure placement macros are loaded so that they can be drawn after placement (e.g. during routing) place_ctx.pl_macros = alloc_and_load_placement_macros(arch.Directs, arch.num_directs); @@ -858,7 +865,7 @@ RouteStatus vpr_route_flow(const Netlist<>& net_list, route_status = RouteStatus(true, -1); } else { //Do or load - // set the net_is_ignored flag for nets that that have route_model set to ideal in route constraints + // set the net_is_ignored flag for nets that have route_model set to ideal in route constraints apply_route_constraints(g_vpr_ctx.routing().constraints); int chan_width = router_opts.fixed_channel_width; @@ -1072,7 +1079,7 @@ RouteStatus vpr_load_routing(t_vpr_setup& vpr_setup, net_delay); timing_info->update(); } - init_draw_coords(fixed_channel_width); + init_draw_coords(fixed_channel_width, g_vpr_ctx.placement().blk_loc_registry()); return RouteStatus(is_legal, fixed_channel_width); } @@ -1111,7 +1118,7 @@ void vpr_create_rr_graph(t_vpr_setup& vpr_setup, const t_arch& arch, int chan_wi &warnings, is_flat); //Initialize drawing, now that we have an RR graph - init_draw_coords(chan_width_fac); + init_draw_coords(chan_width_fac, g_vpr_ctx.placement().blk_loc_registry()); } void vpr_init_graphics(const t_vpr_setup& vpr_setup, const t_arch& arch, bool is_flat) { @@ -1277,8 +1284,8 @@ static void free_atoms() { static void free_placement() { auto& place_ctx = g_vpr_ctx.mutable_placement(); - place_ctx.block_locs.clear(); - place_ctx.grid_blocks.clear(); + place_ctx.mutable_block_locs().clear(); + place_ctx.mutable_grid_blocks().clear(); } static void free_routing() { @@ -1472,6 +1479,7 @@ void vpr_analysis(const Netlist<>& net_list, bool is_flat) { auto& route_ctx = g_vpr_ctx.routing(); auto& atom_ctx = g_vpr_ctx.atom(); + const auto& blk_loc_registry = g_vpr_ctx.placement().blk_loc_registry(); if (route_ctx.route_trees.empty()) { VPR_FATAL_ERROR(VPR_ERROR_ANALYSIS, "No routing loaded -- can not perform post-routing analysis"); @@ -1492,8 +1500,7 @@ void vpr_analysis(const Netlist<>& net_list, //Load the net delays NetPinsMatrix net_delay = make_net_pins_matrix(net_list); - load_net_delay_from_routing(net_list, - net_delay); + load_net_delay_from_routing(net_list, net_delay); //Do final timing analysis auto analysis_delay_calc = std::make_shared(atom_ctx.nlist, atom_ctx.lookup, net_delay, vpr_setup.RouterOpts.flat_routing); @@ -1508,10 +1515,10 @@ void vpr_analysis(const Netlist<>& net_list, //Timing stats VTR_LOG("\n"); - generate_hold_timing_stats(/*prefix=*/"", *timing_info, - *analysis_delay_calc, vpr_setup.AnalysisOpts, vpr_setup.RouterOpts.flat_routing); - generate_setup_timing_stats(/*prefix=*/"", *timing_info, - *analysis_delay_calc, vpr_setup.AnalysisOpts, vpr_setup.RouterOpts.flat_routing); + generate_hold_timing_stats(/*prefix=*/"", *timing_info, *analysis_delay_calc, + vpr_setup.AnalysisOpts, vpr_setup.RouterOpts.flat_routing, blk_loc_registry); + generate_setup_timing_stats(/*prefix=*/"", *timing_info, *analysis_delay_calc, + vpr_setup.AnalysisOpts, vpr_setup.RouterOpts.flat_routing, blk_loc_registry); //Write the post-synthesis netlist if (vpr_setup.AnalysisOpts.gen_post_synthesis_netlist) { diff --git a/vpr/src/base/vpr_constraints_writer.cpp b/vpr/src/base/vpr_constraints_writer.cpp index 62cb2f666d2..0de68549aec 100644 --- a/vpr/src/base/vpr_constraints_writer.cpp +++ b/vpr/src/base/vpr_constraints_writer.cpp @@ -54,7 +54,7 @@ void write_vpr_floorplan_constraints(const char* file_name, int expand, bool sub void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int expand, bool subtile) { auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = g_vpr_ctx.placement().block_locs(); int part_id = 0; /* @@ -69,7 +69,7 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex Partition part; part.set_name(part_name); - const auto& loc = place_ctx.block_locs[blk_id].loc; + const auto& loc = block_locs[blk_id].loc; PartitionRegion pr; Region reg(loc.x - expand, loc.y - expand, @@ -94,7 +94,7 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int horizontal_cutpoints, int vertical_cutpoints) { auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = g_vpr_ctx.placement().block_locs(); auto& device_ctx = g_vpr_ctx.device(); const int n_layers = device_ctx.grid.get_num_layers(); @@ -159,8 +159,8 @@ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int */ for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { const std::unordered_set& atoms = cluster_to_atoms(blk_id); - int x = place_ctx.block_locs[blk_id].loc.x; - int y = place_ctx.block_locs[blk_id].loc.y; + int x = block_locs[blk_id].loc.x; + int y = block_locs[blk_id].loc.y; int width = device_ctx.grid.width(); int height = device_ctx.grid.height(); VTR_ASSERT(x >= 0 && x < width); diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 376a5c6e01e..5dde5f352a6 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -33,6 +33,7 @@ #include "noc_traffic_flows.h" #include "noc_routing.h" #include "tatum/report/TimingPath.hpp" +#include "blk_loc_registry.h" #ifndef NO_SERVER @@ -383,14 +384,48 @@ struct PackingMultithreadingContext : public Context { * or related placer algorithm state. */ struct PlacementContext : public Context { - ///@brief Clustered block placement locations - vtr::vector_map block_locs; + private: + /** + * Determines if blk_loc_registry_ can be accessed by calling getter methods. + * This flag should be set to false at the beginning of the placement stage, + * and set to true at the end of placement. This ensures that variables that + * are subject to change during placement are kept local to the placement stage. + */ + bool loc_vars_are_accessible_ = true; + + /** + * @brief Stores block location information, which is subject to change during the + * placement stage. + */ + BlkLocRegistry blk_loc_registry_; + + public: - ///@brief Clustered pin placement mapping with physical pin - vtr::vector_map physical_pins; + const vtr::vector_map& block_locs() const { VTR_ASSERT_SAFE(loc_vars_are_accessible_); return blk_loc_registry_.block_locs(); } + vtr::vector_map& mutable_block_locs() { VTR_ASSERT_SAFE(loc_vars_are_accessible_); return blk_loc_registry_.mutable_block_locs(); } + const GridBlock& grid_blocks() const { VTR_ASSERT_SAFE(loc_vars_are_accessible_); return blk_loc_registry_.grid_blocks(); } + GridBlock& mutable_grid_blocks() { VTR_ASSERT_SAFE(loc_vars_are_accessible_); return blk_loc_registry_.mutable_grid_blocks(); } + vtr::vector_map& mutable_physical_pins() { VTR_ASSERT_SAFE(loc_vars_are_accessible_); return blk_loc_registry_.mutable_physical_pins(); } + const vtr::vector_map& physical_pins() const { VTR_ASSERT_SAFE(loc_vars_are_accessible_); return blk_loc_registry_.physical_pins(); } + BlkLocRegistry& mutable_blk_loc_registry() { VTR_ASSERT_SAFE(loc_vars_are_accessible_); return blk_loc_registry_; } + const BlkLocRegistry& blk_loc_registry() const { VTR_ASSERT_SAFE(loc_vars_are_accessible_); return blk_loc_registry_; } - ///@brief Clustered block associated with each grid location (i.e. inverse of block_locs) - GridBlock grid_blocks; + /** + * @brief Makes blk_loc_registry_ inaccessible through the getter methods. + * + * This method should be called at the beginning of the placement stage to + * guarantee that the placement stage code does not access block location variables + * stored in the global state. + */ + void lock_loc_vars() { VTR_ASSERT_SAFE(loc_vars_are_accessible_); loc_vars_are_accessible_ = false; } + + /** + * @brief Makes blk_loc_registry_ accessible through the getter methods. + * + * This method should be called at the end of the placement stage to + * make the block location information accessible for subsequent stages. + */ + void unlock_loc_vars() { VTR_ASSERT_SAFE(!loc_vars_are_accessible_); loc_vars_are_accessible_ = true; } ///@brief The pl_macros array stores all the placement macros (usually carry chains). std::vector pl_macros; diff --git a/vpr/src/base/vpr_signal_handler.cpp b/vpr/src/base/vpr_signal_handler.cpp index a8fff7b4394..66cb879ff79 100644 --- a/vpr/src/base/vpr_signal_handler.cpp +++ b/vpr/src/base/vpr_signal_handler.cpp @@ -90,7 +90,7 @@ void checkpoint() { std::string placer_checkpoint_file = "placer_checkpoint.place"; VTR_LOG("Attempting to checkpoint current placement to file: %s\n", placer_checkpoint_file.c_str()); - print_place(nullptr, nullptr, placer_checkpoint_file.c_str()); + print_place(nullptr, nullptr, placer_checkpoint_file.c_str(), g_vpr_ctx.placement().block_locs()); std::string router_checkpoint_file = "router_checkpoint.route"; VTR_LOG("Attempting to checkpoint current routing to file: %s\n", router_checkpoint_file.c_str()); diff --git a/vpr/src/base/vpr_types.cpp b/vpr/src/base/vpr_types.cpp index 475a9a30d19..64a3b7f58e4 100644 --- a/vpr/src/base/vpr_types.cpp +++ b/vpr/src/base/vpr_types.cpp @@ -535,11 +535,11 @@ void t_cluster_placement_stats::flush_invalid_queue() { } bool t_cluster_placement_stats::in_flight_empty() { - return (in_flight.empty()); + return in_flight.empty(); } t_pb_type* t_cluster_placement_stats::in_flight_type() { - return (in_flight.begin()->second->pb_graph_node->pb_type); + return in_flight.begin()->second->pb_graph_node->pb_type; } void t_cluster_placement_stats::free_primitives() { @@ -557,4 +557,4 @@ void t_cluster_placement_stats::free_primitives() { delete primitive.second; } } -} \ No newline at end of file +} diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index a6d78feb6f1..da91125c63d 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -48,6 +48,7 @@ #include "rr_graph_fwd.h" #include "rr_graph_cost.h" #include "rr_graph_type.h" +#include "vtr_vector_map.h" /******************************************************************************* * Global data types and constants @@ -94,12 +95,6 @@ enum class ScreenUpdatePriority { /* Used to avoid floating-point errors when comparing values close to 0 */ #define EPSILON 1.e-15 -#define FIRST_ITER_WIRELENTH_LIMIT 0.85 /* If used wirelength exceeds this value in first iteration of routing, do not route */ - -/* Defining macros for the placement_ctx t_grid_blocks. Assumes that ClusterBlockId's won't exceed positive 32-bit integers */ -constexpr auto EMPTY_BLOCK_ID = ClusterBlockId(-1); -constexpr auto INVALID_BLOCK_ID = ClusterBlockId(-2); - /* * Files */ @@ -839,73 +834,6 @@ struct t_block_loc { bool is_fixed = false; }; -///@brief Stores the clustered blocks placed at a particular grid location -struct t_grid_blocks { - int usage; ///capacity] - */ - std::vector blocks; - - /** - * @brief Test if a subtile at a grid location is occupied by a block. - * - * Returns true if the subtile corresponds to the passed-in id is not - * occupied by a block at this grid location. The subtile id serves - * as the z-dimensional offset in the grid indexing. - */ - inline bool subtile_empty(size_t isubtile) const { - return blocks[isubtile] == EMPTY_BLOCK_ID; - } -}; - -class GridBlock { - public: - GridBlock() = default; - - GridBlock(size_t width, size_t height, size_t layers) { - grid_blocks_.resize({layers, width, height}); - } - - inline void initialized_grid_block_at_location(const t_physical_tile_loc& loc, int num_sub_tiles) { - grid_blocks_[loc.layer_num][loc.x][loc.y].blocks.resize(num_sub_tiles, EMPTY_BLOCK_ID); - } - - inline void set_block_at_location(const t_pl_loc& loc, ClusterBlockId blk_id) { - grid_blocks_[loc.layer][loc.x][loc.y].blocks[loc.sub_tile] = blk_id; - } - - inline ClusterBlockId block_at_location(const t_pl_loc& loc) const { - return grid_blocks_[loc.layer][loc.x][loc.y].blocks[loc.sub_tile]; - } - - inline size_t num_blocks_at_location(const t_physical_tile_loc& loc) const { - return grid_blocks_[loc.layer_num][loc.x][loc.y].blocks.size(); - } - - inline int set_usage(const t_physical_tile_loc loc, int usage) { - return grid_blocks_[loc.layer_num][loc.x][loc.y].usage = usage; - } - - inline int get_usage(const t_physical_tile_loc loc) const { - return grid_blocks_[loc.layer_num][loc.x][loc.y].usage; - } - - inline bool is_sub_tile_empty(const t_physical_tile_loc loc, int sub_tile) const { - return grid_blocks_[loc.layer_num][loc.x][loc.y].subtile_empty(sub_tile); - } - - inline void clear() { - grid_blocks_.clear(); - } - - private: - vtr::NdMatrix grid_blocks_; -}; - ///@brief Names of various files struct t_file_name_opts { std::string ArchFile; @@ -1104,7 +1032,7 @@ class t_place_algorithm { e_place_algorithm algo = e_place_algorithm::CRITICALITY_TIMING_PLACE; }; -enum e_pad_loc_type { +enum class e_pad_loc_type { FREE, RANDOM }; diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp index ec4f764868f..d87375dc8c3 100644 --- a/vpr/src/draw/draw.cpp +++ b/vpr/src/draw/draw.cpp @@ -570,16 +570,18 @@ void free_draw_structs() { #endif /* NO_GRAPHICS */ } -void init_draw_coords(float width_val) { +void init_draw_coords(float clb_width, const BlkLocRegistry& blk_loc_registry) { #ifndef NO_GRAPHICS - /* Load the arrays containing the left and bottom coordinates of the clbs * - * forming the FPGA. tile_width_val sets the width and height of a drawn * - * clb. */ t_draw_state* draw_state = get_draw_state_vars(); t_draw_coords* draw_coords = get_draw_coords_vars(); - auto& device_ctx = g_vpr_ctx.device(); + const auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; + /* Store a reference to block location variables so that other drawing + * functions can access block location information without accessing + * the global placement state, which is inaccessible during placement.*/ + set_graphics_blk_loc_registry_ref(blk_loc_registry); + if (!draw_state->show_graphics && !draw_state->save_graphics && draw_state->graphics_commands.empty()) return; //do not initialize only if --disp off and --save_graphics off @@ -593,7 +595,7 @@ void init_draw_coords(float width_val) { draw_state->draw_rr_node[inode].node_highlighted = false; } } - draw_coords->tile_width = width_val; + draw_coords->tile_width = clb_width; draw_coords->pin_size = 0.3; for (const auto& type : device_ctx.physical_tile_types) { auto num_pins = type.num_pins; @@ -636,7 +638,8 @@ void init_draw_coords(float width_val) { {(1. + VISIBLE_MARGIN) * draw_width, (1. + VISIBLE_MARGIN) * draw_height}); #else - (void)width_val; + (void)clb_width; + (void)blk_loc_registry; #endif /* NO_GRAPHICS */ } @@ -717,10 +720,7 @@ void act_on_key_press(ezgl::application* app, GdkEventKey* /*event*/, char* key_ } void act_on_mouse_press(ezgl::application* app, GdkEventButton* event, double x, double y) { - // std::cout << "User clicked the "; - if (event->button == 1) { - // std::cout << "left "; if (window_mode) { //click on any two points to form new window rectangle bound @@ -780,12 +780,6 @@ void act_on_mouse_press(ezgl::application* app, GdkEventButton* event, double x, highlight_blocks(x, y); } } - // else if (event->button == 2) - // std::cout << "middle "; - // else if (event->button == 3) - // std::cout << "right "; - - // std::cout << "mouse button at coordinates (" << x << "," << y << ") " << std::endl; } void act_on_mouse_move(ezgl::application* app, GdkEventButton* /* event */, double x, double y) { @@ -1006,14 +1000,14 @@ static void highlight_blocks(double x, double y) { char msg[vtr::bufsize]; ClusterBlockId clb_index = get_cluster_block_id_from_xy_loc(x, y); - if (clb_index == EMPTY_BLOCK_ID || clb_index == ClusterBlockId::INVALID()) { + if (clb_index == ClusterBlockId::INVALID()) { return; /* Nothing was found on any layer*/ } auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); - VTR_ASSERT(clb_index != EMPTY_BLOCK_ID); + VTR_ASSERT(clb_index != ClusterBlockId::INVALID()); ezgl::rectangle clb_bbox = draw_coords->get_absolute_clb_bbox(clb_index, cluster_ctx.clb_nlist.block_type(clb_index)); // note: this will clear the selected sub-block if show_blk_internal is 0, @@ -1033,8 +1027,8 @@ static void highlight_blocks(double x, double y) { clb_index); sprintf(msg, "Block #%zu (%s) at (%d, %d) selected.", size_t(clb_index), cluster_ctx.clb_nlist.block_name(clb_index).c_str(), - place_ctx.block_locs[clb_index].loc.x, - place_ctx.block_locs[clb_index].loc.y); + block_locs[clb_index].loc.x, + block_locs[clb_index].loc.y); } //If manual moves is activated, then user can select block from the grid. @@ -1047,16 +1041,15 @@ static void highlight_blocks(double x, double y) { application.update_message(msg); application.refresh_drawing(); - return; } ClusterBlockId get_cluster_block_id_from_xy_loc(double x, double y) { t_draw_coords* draw_coords = get_draw_coords_vars(); t_draw_state* draw_state = get_draw_state_vars(); - ClusterBlockId clb_index = EMPTY_BLOCK_ID; + auto clb_index = ClusterBlockId::INVALID(); auto& device_ctx = g_vpr_ctx.device(); auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); + const auto& grid_blocks = get_graphics_blk_loc_registry_ref().grid_blocks(); /// determine block /// ezgl::rectangle clb_bbox; @@ -1079,21 +1072,21 @@ ClusterBlockId get_cluster_block_id_from_xy_loc(double x, double y) { // iterate over sub_blocks const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num}); for (int k = 0; k < type->capacity; ++k) { - clb_index = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}); - if (clb_index != EMPTY_BLOCK_ID) { + clb_index = grid_blocks.block_at_location({i, j, k, layer_num}); + if (clb_index) { clb_bbox = draw_coords->get_absolute_clb_bbox(clb_index, cluster_ctx.clb_nlist.block_type(clb_index)); if (clb_bbox.contains({x, y})) { return clb_index; // we've found the clb } else { - clb_index = EMPTY_BLOCK_ID; + clb_index = ClusterBlockId::INVALID(); } } } } } } - // Searched all layers and found no clb at specified location, returning clb_index = EMPTY_BLOCK_ID. + // Searched all layers and found no clb at specified location, returning clb_index = ClusterBlockId::INVALID(). return clb_index; } diff --git a/vpr/src/draw/draw.h b/vpr/src/draw/draw.h index 1c39f12f49b..2bbd17d077f 100644 --- a/vpr/src/draw/draw.h +++ b/vpr/src/draw/draw.h @@ -44,9 +44,15 @@ extern ezgl::application application; void update_screen(ScreenUpdatePriority priority, const char* msg, enum pic_type pic_on_screen_val, std::shared_ptr timing_info); -//Initializes the drawing locations. //FIXME: Currently broken if no rr-graph is loaded -void init_draw_coords(float clb_width); +/** + * @brief Load the arrays containing the left and bottom coordinates of the clbs. + * forming the FPGA. + * + * @param clb_width The width and height of a drawn clb. + * @param blk_loc_registry A reference to placement location information. + */ +void init_draw_coords(float clb_width, const BlkLocRegistry& blk_loc_registry); /* Sets the static show_graphics and gr_automode variables to the * * desired values. They control if graphics are enabled and, if so, * @@ -132,7 +138,7 @@ bool highlight_loc_with_specific_color(t_pl_loc curr_loc, ezgl::color& loc_color * block types than colour choices. This ensures we support any number of types, although the colours may repeat.*/ ezgl::color get_block_type_color(t_physical_tile_type_ptr type); -/* Lightens a color's luminance [0, 1] by an aboslute 'amount' */ +/* Lightens a color's luminance [0, 1] by an absolute 'amount' */ ezgl::color lighten_color(ezgl::color color, float amount); void toggle_window_mode(GtkWidget* /*widget*/, ezgl::application* /*app*/); @@ -162,7 +168,7 @@ t_draw_layer_display get_element_visibility_and_transparency(int src_layer, int * @param x * @param y * @return returns the ClusterBlockId of the clb at the specified (x,y) location (in world coordinates) as seen by looking downwards from the top of a 3D FPGA. - * Chooses the clb on the top visible layer if there are overlapping blocks. Returns EMPTY_BLOCK_ID (-1) otherwise,if clb is not found on any visible layer. + * Chooses the clb on the top visible layer if there are overlapping blocks. Returns INVALID(-1) otherwise,if clb is not found on any visible layer. */ ClusterBlockId get_cluster_block_id_from_xy_loc(double x, double y); diff --git a/vpr/src/draw/draw_basic.cpp b/vpr/src/draw/draw_basic.cpp index e37de7a84fa..82ad456f70f 100644 --- a/vpr/src/draw/draw_basic.cpp +++ b/vpr/src/draw/draw_basic.cpp @@ -103,7 +103,7 @@ void drawplace(ezgl::renderer* g) { t_draw_coords* draw_coords = get_draw_coords_vars(); auto& device_ctx = g_vpr_ctx.device(); auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); + const auto& grid_blocks = get_graphics_blk_loc_registry_ref().grid_blocks(); ClusterBlockId bnum; int num_sub_tiles; @@ -136,12 +136,10 @@ void drawplace(ezgl::renderer* g) { for (int k = 0; k < num_sub_tiles; ++k) { /* Look at the tile at start of large block */ - bnum = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}); + bnum = grid_blocks.block_at_location({i, j, k, layer_num}); /* Fill background for the clb. Do not fill if "show_blk_internal" * is toggled. */ - if (bnum == INVALID_BLOCK_ID) - continue; //Determine the block color and logical type ezgl::color block_color; @@ -159,8 +157,8 @@ void drawplace(ezgl::renderer* g) { block_color); } // No color specified at this location; use the block color. - if (current_loc_is_highlighted == false) { - if (bnum != EMPTY_BLOCK_ID) { + if (!current_loc_is_highlighted) { + if (bnum) { block_color = draw_state->block_color(bnum); } else { block_color = get_block_type_color(type); @@ -184,15 +182,14 @@ void drawplace(ezgl::renderer* g) { g->set_color(ezgl::BLACK, transparency_factor); - g->set_line_dash( - (EMPTY_BLOCK_ID == bnum) ? ezgl::line_dash::asymmetric_5_3 : ezgl::line_dash::none); + g->set_line_dash((bnum == ClusterBlockId::INVALID()) ? ezgl::line_dash::asymmetric_5_3 : ezgl::line_dash::none); if (draw_state->draw_block_outlines) { g->draw_rectangle(abs_clb_bbox); } if (draw_state->draw_block_text) { /* Draw text if the space has parts of the netlist */ - if (bnum != EMPTY_BLOCK_ID && bnum != INVALID_BLOCK_ID) { + if (bnum) { std::string name = cluster_ctx.clb_nlist.block_name( bnum) + vtr::string_fmt(" (#%zu)", size_t(bnum)); @@ -230,7 +227,7 @@ void drawnets(ezgl::renderer* g) { ClusterBlockId b1, b2; auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); float transparency_factor; float NET_ALPHA = draw_state->net_alpha; @@ -244,7 +241,7 @@ void drawnets(ezgl::renderer* g) { /* Draw the net as a star from the source to each sink. Draw from centers of * * blocks (or sub blocks in the case of IOs). */ - for (auto net_id : cluster_ctx.clb_nlist.nets()) { + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) { continue; /* Don't draw */ } @@ -256,7 +253,7 @@ void drawnets(ezgl::renderer* g) { b1 = cluster_ctx.clb_nlist.net_driver_block(net_id); //The layer of the net driver block - driver_block_layer_num = place_ctx.block_locs[b1].loc.layer; + driver_block_layer_num = block_locs[b1].loc.layer; //To only show nets that are connected to currently active layers on the screen if (!draw_state->draw_layer_display[driver_block_layer_num].visible) { @@ -264,11 +261,11 @@ void drawnets(ezgl::renderer* g) { } ezgl::point2d driver_center = draw_coords->get_absolute_clb_bbox(b1, cluster_ctx.clb_nlist.block_type(b1)).center(); - for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { + for (ClusterPinId pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { b2 = cluster_ctx.clb_nlist.pin_block(pin_id); //the layer of the pin block (net sinks) - sink_block_layer_num = place_ctx.block_locs[b2].loc.layer; + sink_block_layer_num =block_locs[b2].loc.layer; t_draw_layer_display element_visibility = get_element_visibility_and_transparency(driver_block_layer_num, sink_block_layer_num); @@ -277,7 +274,7 @@ void drawnets(ezgl::renderer* g) { } transparency_factor = element_visibility.alpha; - //Take the higher of the 2 transparency values that the user can select from the UI + //Take the highest of the 2 transparency values that the user can select from the UI // Compare the current cross layer transparency to the overall Net transparency set by the user. g->set_color(draw_state->net_color[net_id], fmin(transparency_factor, draw_state->net_color[net_id].alpha * NET_ALPHA)); @@ -793,7 +790,7 @@ bool is_edge_valid_to_draw(RRNodeId current_node, RRNodeId prev_node) { } /* Draws any placement macros (e.g. carry chains, which require specific relative placements - * between some blocks) if the Placement Macros (in the GUI) is seelected. + * between some blocks) if the Placement Macros (in the GUI) is selected. */ void draw_placement_macros(ezgl::renderer* g) { t_draw_state* draw_state = get_draw_state_vars(); @@ -804,8 +801,9 @@ void draw_placement_macros(ezgl::renderer* g) { t_draw_coords* draw_coords = get_draw_coords_vars(); auto& place_ctx = g_vpr_ctx.placement(); - for (size_t imacro = 0; imacro < place_ctx.pl_macros.size(); ++imacro) { - const t_pl_macro* pl_macro = &place_ctx.pl_macros[imacro]; + auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); + + for (const t_pl_macro& pl_macro : place_ctx.pl_macros) { //TODO: for now we just draw the bounding box of the macro, which is incorrect for non-rectangular macros... int xlow = std::numeric_limits::max(); @@ -815,24 +813,23 @@ void draw_placement_macros(ezgl::renderer* g) { int x_root = OPEN; int y_root = OPEN; - for (size_t imember = 0; imember < pl_macro->members.size(); - ++imember) { - const t_pl_macro_member* member = &pl_macro->members[imember]; + for (size_t imember = 0; imember < pl_macro.members.size(); ++imember) { + const t_pl_macro_member& member = pl_macro.members[imember]; - ClusterBlockId blk = member->blk_index; + ClusterBlockId blk = member.blk_index; if (imember == 0) { - x_root = place_ctx.block_locs[blk].loc.x; - y_root = place_ctx.block_locs[blk].loc.y; + x_root = block_locs[blk].loc.x; + y_root = block_locs[blk].loc.y; } - int x = x_root + member->offset.x; - int y = y_root + member->offset.y; + int x = x_root + member.offset.x; + int y = y_root + member.offset.y; xlow = std::min(xlow, x); ylow = std::min(ylow, y); - xhigh = std::max(xhigh, x + physical_tile_type(blk)->width); - yhigh = std::max(yhigh, y + physical_tile_type(blk)->height); + xhigh = std::max(xhigh, x + physical_tile_type(block_locs[blk].loc)->width); + yhigh = std::max(yhigh, y + physical_tile_type(block_locs[blk].loc)->height); } double draw_xlow = draw_coords->tile_x[xlow]; @@ -1187,13 +1184,13 @@ void draw_crit_path_elements(const std::vector& paths, const } int get_timing_path_node_layer_num(tatum::NodeId node) { - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); auto& atom_ctx = g_vpr_ctx.atom(); AtomPinId atom_pin = atom_ctx.lookup.tnode_atom_pin(node); AtomBlockId atom_block = atom_ctx.nlist.pin_block(atom_pin); ClusterBlockId clb_block = atom_ctx.lookup.atom_clb(atom_block); - return place_ctx.block_locs[clb_block].loc.layer; + return block_locs[clb_block].loc.layer; } bool is_flyline_valid_to_draw(int src_layer, int sink_layer) { @@ -1420,6 +1417,7 @@ void draw_block_pin_util() { auto& device_ctx = g_vpr_ctx.device(); auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); std::map total_input_pins; std::map total_output_pins; @@ -1434,9 +1432,9 @@ void draw_block_pin_util() { auto blks = cluster_ctx.clb_nlist.blocks(); vtr::vector pin_util(blks.size()); - for (auto blk : blks) { - auto type = physical_tile_type(blk); - + for (ClusterBlockId blk : blks) { + t_pl_loc block_loc = block_locs[blk].loc; + auto type = physical_tile_type(block_loc); if (draw_state->show_blk_pin_util == DRAW_BLOCK_PIN_UTIL_TOTAL) { pin_util[blk] = cluster_ctx.clb_nlist.block_pins(blk).size() / float(total_input_pins[type] + total_output_pins[type]); diff --git a/vpr/src/draw/draw_global.cpp b/vpr/src/draw/draw_global.cpp index 391b659bbcf..936b0eeb4a2 100644 --- a/vpr/src/draw/draw_global.cpp +++ b/vpr/src/draw/draw_global.cpp @@ -26,6 +26,13 @@ static t_draw_state draw_state; */ static t_draw_coords draw_coords; +/** + * @brief Stores a reference to a PlaceLocVars to be used in the graphics code. + * @details This reference let us pass in a currently-being-optimized placement state, + * rather than using the global placement state in placement context that is valid only once placement is done + */ +static std::optional> blk_loc_registry_ref; + /*********************** Accessor Subroutines Definition ********************/ /* This accessor function returns pointer to the global variable @@ -40,4 +47,12 @@ t_draw_state* get_draw_state_vars() { return &draw_state; } +void set_graphics_blk_loc_registry_ref(const BlkLocRegistry& blk_loc_registry) { + blk_loc_registry_ref = std::ref(blk_loc_registry); +} + +const BlkLocRegistry& get_graphics_blk_loc_registry_ref() { + return blk_loc_registry_ref->get(); +} + #endif // NO_GRAPHICS diff --git a/vpr/src/draw/draw_global.h b/vpr/src/draw/draw_global.h index f1eec3967f5..9ba5d734020 100644 --- a/vpr/src/draw/draw_global.h +++ b/vpr/src/draw/draw_global.h @@ -27,6 +27,24 @@ t_draw_coords* get_draw_coords_vars(); t_draw_state* get_draw_state_vars(); +/** + * @brief Set the reference to placement location variable. + * + * During the placement stage, this reference should point to a local object + * in the placement stage because the placement stage does not change the + * global stage in place_ctx until the end of placement. After the placement is + * done, the reference should point to the global state stored in place_ctx. + * + * @param blk_loc_registry The PlaceLocVars that the reference will point to. + */ +void set_graphics_blk_loc_registry_ref(const BlkLocRegistry& blk_loc_registry); + +/** + * @brief Returns the reference to placement block location variables. + * @return A const reference to placement block location variables. + */ +const BlkLocRegistry& get_graphics_blk_loc_registry_ref(); + #endif // NO_GRAPHICS #endif diff --git a/vpr/src/draw/draw_searchbar.cpp b/vpr/src/draw/draw_searchbar.cpp index fd11b0a2f43..f3457c45992 100644 --- a/vpr/src/draw/draw_searchbar.cpp +++ b/vpr/src/draw/draw_searchbar.cpp @@ -110,14 +110,17 @@ void draw_highlight_blocks_color(t_logical_block_type_ptr type, t_draw_state* draw_state = get_draw_state_vars(); auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); for (k = 0; k < type->pb_type->num_pins; k++) { /* Each pin on a CLB */ ClusterNetId net_id = cluster_ctx.clb_nlist.block_net(blk_id, k); - if (net_id == ClusterNetId::INVALID()) + if (net_id == ClusterNetId::INVALID()) { continue; + } - auto physical_tile = physical_tile_type(blk_id); + t_pl_loc block_loc = block_locs[blk_id].loc; + auto physical_tile = physical_tile_type(block_loc); int physical_pin = get_physical_pin(physical_tile, type, k); auto class_type = get_pin_type_from_pin_physical_num(physical_tile, physical_pin); diff --git a/vpr/src/draw/draw_types.cpp b/vpr/src/draw/draw_types.cpp index bd63798d398..d2ed9e47c3c 100644 --- a/vpr/src/draw/draw_types.cpp +++ b/vpr/src/draw/draw_types.cpp @@ -13,13 +13,15 @@ *******************************************/ ezgl::color t_draw_state::block_color(ClusterBlockId blk) const { if (use_default_block_color_[blk]) { - t_physical_tile_type_ptr tile_type = nullptr; auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); - if (place_ctx.block_locs.empty()) { //No placement, pick best match + const auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); + + t_physical_tile_type_ptr tile_type = nullptr; + if (block_locs.empty()) { //No placement, pick best match tile_type = pick_physical_type(cluster_ctx.clb_nlist.block_type(blk)); } else { // Have placement, select physical tile implementing blk - tile_type = physical_tile_type(blk); + t_pl_loc block_loc = block_locs[blk].loc; + tile_type = physical_tile_type(block_loc); } VTR_ASSERT(tile_type != nullptr); return get_block_type_color(tile_type); @@ -85,12 +87,13 @@ float t_draw_coords::get_tile_height() { } ezgl::rectangle t_draw_coords::get_pb_bbox(ClusterBlockId clb_index, const t_pb_graph_node& pb_gnode) { - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); auto& cluster_ctx = g_vpr_ctx.clustering(); - return get_pb_bbox(place_ctx.block_locs[clb_index].loc.layer, - place_ctx.block_locs[clb_index].loc.x, - place_ctx.block_locs[clb_index].loc.y, - place_ctx.block_locs[clb_index].loc.sub_tile, + + return get_pb_bbox(block_locs[clb_index].loc.layer, + block_locs[clb_index].loc.x, + block_locs[clb_index].loc.y, + block_locs[clb_index].loc.sub_tile, cluster_ctx.clb_nlist.block_type(clb_index), pb_gnode); } @@ -149,9 +152,9 @@ ezgl::rectangle t_draw_coords::get_absolute_pb_bbox(const ClusterBlockId clb_ind } ezgl::rectangle t_draw_coords::get_absolute_clb_bbox(const ClusterBlockId clb_index, const t_logical_block_type_ptr block_type) { - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); - t_pl_loc loc = place_ctx.block_locs[clb_index].loc; + t_pl_loc loc = block_locs[clb_index].loc; return get_pb_bbox(loc.layer, loc.x, loc.y, loc.sub_tile, block_type); } diff --git a/vpr/src/draw/draw_types.h b/vpr/src/draw/draw_types.h index 4750dc37dfe..3779af4cf79 100644 --- a/vpr/src/draw/draw_types.h +++ b/vpr/src/draw/draw_types.h @@ -397,7 +397,7 @@ struct t_draw_coords { private: float tile_width; - friend void init_draw_coords(float); + friend void init_draw_coords(float width_val, const BlkLocRegistry& blk_loc_registry); }; #endif // NO_GRAPHICS diff --git a/vpr/src/draw/intra_logic_block.cpp b/vpr/src/draw/intra_logic_block.cpp index 214ba01fe23..855f2262e63 100644 --- a/vpr/src/draw/intra_logic_block.cpp +++ b/vpr/src/draw/intra_logic_block.cpp @@ -152,7 +152,7 @@ void draw_internal_draw_subblk(ezgl::renderer* g) { } auto& device_ctx = g_vpr_ctx.device(); auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); + const auto& grid_blocks = get_graphics_blk_loc_registry_ref().grid_blocks(); int total_layer_num = device_ctx.grid.get_num_layers(); @@ -175,14 +175,16 @@ void draw_internal_draw_subblk(ezgl::renderer* g) { int num_sub_tiles = type->capacity; for (int k = 0; k < num_sub_tiles; ++k) { /* Don't draw if block is empty. */ - if (place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}) == EMPTY_BLOCK_ID || place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}) == INVALID_BLOCK_ID) + if (!grid_blocks.block_at_location({i, j, k, layer_num})) { continue; + } /* Get block ID */ - ClusterBlockId bnum = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}); + ClusterBlockId bnum = grid_blocks.block_at_location({i, j, k, layer_num}); /* Safety check, that physical blocks exists in the CLB */ - if (cluster_ctx.clb_nlist.block_pb(bnum) == nullptr) + if (cluster_ctx.clb_nlist.block_pb(bnum) == nullptr) { continue; + } draw_internal_pb(bnum, cluster_ctx.clb_nlist.block_pb(bnum), ezgl::rectangle({0, 0}, 0, 0), cluster_ctx.clb_nlist.block_type(bnum), g); } } @@ -260,7 +262,6 @@ static void draw_internal_load_coords(int type_descrip_index, t_pb_graph_node* p } } } - return; } /* Helper function which computes bounding box values for a sub-block. The coordinates @@ -272,7 +273,7 @@ draw_internal_calc_coords(int type_descrip_index, t_pb_graph_node* pb_graph_node float sub_tile_x, sub_tile_y; float child_width, child_height; auto& device_ctx = g_vpr_ctx.device(); - auto& place_ctx = g_vpr_ctx.placement(); + const auto& grid_blocks = get_graphics_blk_loc_registry_ref().grid_blocks(); // get the bbox for this pb type ezgl::rectangle& pb_bbox = get_draw_coords_vars()->blk_info.at(type_descrip_index).get_pb_bbox_ref(*pb_graph_node); @@ -290,7 +291,7 @@ draw_internal_calc_coords(int type_descrip_index, t_pb_graph_node* pb_graph_node int capacity = device_ctx.physical_tile_types[type_descrip_index].capacity; // TODO: this is a hack - should be fixed for the layer_num const auto& type = device_ctx.grid.get_physical_type({1, 0, 0}); - if (capacity > 1 && device_ctx.grid.width() > 0 && device_ctx.grid.height() > 0 && place_ctx.grid_blocks.get_usage({1, 0, 0}) != 0 + if (capacity > 1 && device_ctx.grid.width() > 0 && device_ctx.grid.height() > 0 && grid_blocks.get_usage({1, 0, 0}) != 0 && type_descrip_index == type->index) { // that should test for io blocks, and setting capacity_divisor > 1 // will squish every thing down @@ -329,8 +330,6 @@ draw_internal_calc_coords(int type_descrip_index, t_pb_graph_node* pb_graph_node *blk_width = child_width; *blk_height = child_height; - - return; } # ifndef NO_GRAPHICS @@ -342,7 +341,7 @@ static void draw_internal_pb(const ClusterBlockId clb_index, t_pb* pb, const ezg t_draw_coords* draw_coords = get_draw_coords_vars(); t_draw_state* draw_state = get_draw_state_vars(); - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); t_selected_sub_block_info& sel_sub_info = get_selected_sub_block_info(); @@ -350,7 +349,7 @@ static void draw_internal_pb(const ClusterBlockId clb_index, t_pb* pb, const ezg ezgl::rectangle temp = draw_coords->get_pb_bbox(clb_index, *pb->pb_graph_node); ezgl::rectangle abs_bbox = temp + parent_bbox.bottom_left(); - int layer_num = place_ctx.block_locs[clb_index].loc.layer; + int layer_num = block_locs[clb_index].loc.layer; int transparency_factor = draw_state->draw_layer_display[layer_num].alpha; // if we've gone too far, don't draw anything @@ -560,7 +559,7 @@ void draw_logical_connections(ezgl::renderer* g) { t_draw_state* draw_state = get_draw_state_vars(); auto& atom_ctx = g_vpr_ctx.atom(); - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); g->set_line_dash(ezgl::line_dash::none); @@ -578,7 +577,7 @@ void draw_logical_connections(ezgl::renderer* g) { AtomBlockId src_blk_id = atom_ctx.nlist.pin_block(driver_pin_id); ClusterBlockId src_clb = atom_ctx.lookup.atom_clb(src_blk_id); - int src_layer_num = place_ctx.block_locs[src_clb].loc.layer; + int src_layer_num = block_locs[src_clb].loc.layer; //To only show primitive nets that are connected to currently active layers on the screen if (!draw_state->draw_layer_display[src_layer_num].visible) { continue; /* Don't Draw */ @@ -593,7 +592,7 @@ void draw_logical_connections(ezgl::renderer* g) { AtomBlockId sink_blk_id = atom_ctx.nlist.pin_block(sink_pin_id); const t_pb_graph_node* sink_pb_gnode = atom_ctx.lookup.atom_pb_graph_node(sink_blk_id); ClusterBlockId sink_clb = atom_ctx.lookup.atom_clb(sink_blk_id); - int sink_layer_num = place_ctx.block_locs[sink_clb].loc.layer; + int sink_layer_num = block_locs[sink_clb].loc.layer; t_draw_layer_display element_visibility = get_element_visibility_and_transparency(src_layer_num, sink_layer_num); @@ -653,7 +652,7 @@ void find_pin_index_at_model_scope(const AtomPinId pin_id, const AtomBlockId blk int atom_port_index = atom_ctx.nlist.pin_port_bit(pin_id); //The index of this pin in the model is the pins counted so-far - //(i.e. accross previous ports) plus the index in the port + //(i.e. across previous ports) plus the index in the port *pin_index = pin_cnt + atom_port_index; } diff --git a/vpr/src/draw/manual_moves.cpp b/vpr/src/draw/manual_moves.cpp index 3d907550396..00fb05e3ab2 100644 --- a/vpr/src/draw/manual_moves.cpp +++ b/vpr/src/draw/manual_moves.cpp @@ -5,7 +5,11 @@ * @brief Contains the function definitions needed for manual moves feature. * * Includes the graphics/gtk function for manual moves. The Manual Move Generator class is defined manual_move_generator.h/cpp. - * The manual move feature allows the user to select a move by choosing the block to move, x position, y position, subtile position. If the placer accepts the move, the user can accept or reject the move with respect to the delta cost, delta timing and delta bounding box cost displayed on the UI. The manual move feature interacts with placement through the ManualMoveGenerator class in the manual_move_generator.cpp/h files and in the place.cpp file by checking if the manual move toggle button in the UI is active or not, and calls the function needed. + * The manual move feature allows the user to select a move by choosing the block to move, x position, y position, subtile position. + * If the placer accepts the move, the user can accept or reject the move with respect to the delta cost, + * delta timing and delta bounding box cost displayed on the UI. The manual move feature interacts with placement through + * the ManualMoveGenerator class in the manual_move_generator.cpp/h files and in the place.cpp file by checking + * if the manual move toggle button in the UI is active or not, and calls the function needed. */ #include "manual_moves.h" @@ -144,8 +148,8 @@ void calculate_cost_callback(GtkWidget* /*widget*/, GtkWidget* grid) { bool is_manual_move_legal(ClusterBlockId block_id, t_pl_loc to) { auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); auto& device_ctx = g_vpr_ctx.device(); + const auto& grid_blocks = get_graphics_blk_loc_registry_ref().grid_blocks(); //if the block is not found if ((!cluster_ctx.clb_nlist.valid_block_id(ClusterBlockId(block_id)))) { @@ -169,16 +173,16 @@ bool is_manual_move_legal(ClusterBlockId block_id, t_pl_loc to) { } //If the destination block is user constrained, abort this swap - auto b_to = place_ctx.grid_blocks.block_at_location(to); - if (b_to != INVALID_BLOCK_ID && b_to != EMPTY_BLOCK_ID) { - if (place_ctx.block_locs[b_to].is_fixed) { + ClusterBlockId b_to = grid_blocks.block_at_location(to); + if (b_to) { + if (get_graphics_blk_loc_registry_ref().block_locs()[b_to].is_fixed) { invalid_breakpoint_entry_window("Block is fixed"); return false; } } //If the block requested is already in that location. - t_pl_loc current_block_loc = place_ctx.block_locs[block_id].loc; + t_pl_loc current_block_loc = get_graphics_blk_loc_registry_ref().block_locs()[block_id].loc; if (to.x == current_block_loc.x && to.y == current_block_loc.y && to.sub_tile == current_block_loc.sub_tile) { invalid_breakpoint_entry_window("The block is currently in this location"); return false; @@ -301,7 +305,12 @@ e_move_result pl_do_manual_move(double d_cost, double d_timing, double d_boundin return move_outcome; } -e_create_move manual_move_display_and_propose(ManualMoveGenerator& manual_move_generator, t_pl_blocks_to_be_moved& blocks_affected, e_move_type& move_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { +e_create_move manual_move_display_and_propose(ManualMoveGenerator& manual_move_generator, + t_pl_blocks_to_be_moved& blocks_affected, + e_move_type& move_type, + float rlim, + const t_placer_opts& placer_opts, + const PlacerCriticalities* criticalities) { draw_manual_moves_window(""); update_screen(ScreenUpdatePriority::MAJOR, " ", PLACEMENT, nullptr); move_type = e_move_type::MANUAL_MOVE; diff --git a/vpr/src/draw/manual_moves.h b/vpr/src/draw/manual_moves.h index 94d981a1e35..7f78ff0e876 100644 --- a/vpr/src/draw/manual_moves.h +++ b/vpr/src/draw/manual_moves.h @@ -149,11 +149,18 @@ bool string_is_a_number(const std::string& block_id); * @param d_bounding_box: Delta bounding box for cost summary dialog function. * @param move_outcome: Move result from placement for cost summary dialog function. * - * Helper function used in place.cpp. The ManualMovesState variable are updated and the manual_move_cost_summary_dialog is called to display the cost members to the user in the UI and waits for the user to either ACCPET/REJECT the manual move. + * Helper function used in place.cpp. The ManualMovesState variable are updated and + * the manual_move_cost_summary_dialog is called to display the cost members to the user + * in the UI and waits for the user to either ACCEPT/REJECT the manual move. */ e_move_result pl_do_manual_move(double d_cost, double d_timing, double d_bounding_box, e_move_result& move_outcome); -e_create_move manual_move_display_and_propose(ManualMoveGenerator& manual_move_generator, t_pl_blocks_to_be_moved& blocks_affected, e_move_type& move_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities); +e_create_move manual_move_display_and_propose(ManualMoveGenerator& manual_move_generator, + t_pl_blocks_to_be_moved& blocks_affected, + e_move_type& move_type, + float rlim, + const t_placer_opts& placer_opts, + const PlacerCriticalities* criticalities); #endif /*NO_GRAPHICS*/ diff --git a/vpr/src/draw/search_bar.cpp b/vpr/src/draw/search_bar.cpp index bd7e160d4ad..2842577b2fa 100644 --- a/vpr/src/draw/search_bar.cpp +++ b/vpr/src/draw/search_bar.cpp @@ -283,12 +283,12 @@ void auto_zoom_rr_node(RRNodeId rr_node_id) { void highlight_cluster_block(ClusterBlockId clb_index) { char msg[vtr::bufsize]; auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); + const auto& block_locs = get_graphics_blk_loc_registry_ref().block_locs(); /// determine block /// ezgl::rectangle clb_bbox; - VTR_ASSERT(clb_index != EMPTY_BLOCK_ID); + VTR_ASSERT(clb_index != ClusterBlockId::INVALID()); ezgl::point2d point_in_clb = clb_bbox.bottom_left(); highlight_sub_block(point_in_clb, clb_index, cluster_ctx.clb_nlist.block_pb(clb_index)); @@ -300,7 +300,9 @@ void highlight_cluster_block(ClusterBlockId clb_index) { } else { /* Highlight block and fan-in/fan-outs. */ draw_highlight_blocks_color(cluster_ctx.clb_nlist.block_type(clb_index), clb_index); - sprintf(msg, "Block #%zu (%s) at (%d, %d) selected.", size_t(clb_index), cluster_ctx.clb_nlist.block_name(clb_index).c_str(), place_ctx.block_locs[clb_index].loc.x, place_ctx.block_locs[clb_index].loc.y); + sprintf(msg, "Block #%zu (%s) at (%d, %d) selected.", + size_t(clb_index), cluster_ctx.clb_nlist.block_name(clb_index).c_str(), + block_locs[clb_index].loc.x, block_locs[clb_index].loc.y); } application.update_message(msg); diff --git a/vpr/src/pack/post_routing_pb_pin_fixup.cpp b/vpr/src/pack/post_routing_pb_pin_fixup.cpp index ceb9263e12b..2126c0b7b1a 100644 --- a/vpr/src/pack/post_routing_pb_pin_fixup.cpp +++ b/vpr/src/pack/post_routing_pb_pin_fixup.cpp @@ -1090,7 +1090,7 @@ void sync_netlists_to_routing(const Netlist<>& net_list, device_ctx, clustering_ctx, rr_node_nets, - placement_ctx.block_locs[clb_blk_id].loc, + placement_ctx.block_locs()[clb_blk_id].loc, clb_blk_id, num_mismatches, verbose, diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp index 1e23ec468b8..46eb04955a5 100644 --- a/vpr/src/pack/re_cluster_util.cpp +++ b/vpr/src/pack/re_cluster_util.cpp @@ -82,10 +82,10 @@ void commit_mol_move(ClusterBlockId old_clb, //place the new cluster if this function called during placement (after the initial placement is done) if (!during_packing && new_clb_created) { int imacro; - g_vpr_ctx.mutable_placement().block_locs.resize(g_vpr_ctx.placement().block_locs.size() + 1); + g_vpr_ctx.mutable_placement().mutable_block_locs().resize(g_vpr_ctx.placement().block_locs().size() + 1); get_imacro_from_iblk(&imacro, old_clb, g_vpr_ctx.placement().pl_macros); set_imacro_for_iblk(&imacro, new_clb); - place_one_block(new_clb, device_ctx.pad_loc_type, nullptr, nullptr); + place_one_block(new_clb, device_ctx.pad_loc_type, nullptr, nullptr, g_vpr_ctx.mutable_placement().mutable_blk_loc_registry()); } } diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp index aa149b5d4e4..78984f49928 100644 --- a/vpr/src/place/RL_agent_util.cpp +++ b/vpr/src/place/RL_agent_util.cpp @@ -2,11 +2,13 @@ #include "static_move_generator.h" #include "manual_move_generator.h" -void create_move_generators(std::unique_ptr& move_generator, - std::unique_ptr& move_generator2, - const t_placer_opts& placer_opts, - int move_lim, - float noc_attraction_weight) { +std::pair, std::unique_ptr> create_move_generators(PlacerState& placer_state, + const t_placer_opts& placer_opts, + int move_lim, + double noc_attraction_weight) { + + std::pair, std::unique_ptr> move_generators; + if (!placer_opts.RL_agent_placement) { // RL agent is disabled auto move_types = placer_opts.place_static_move_prob; move_types.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES, 0.0f); @@ -18,8 +20,8 @@ void create_move_generators(std::unique_ptr& move_generator, move_name.c_str(), placer_opts.place_static_move_prob[move_type]); } - move_generator = std::make_unique(placer_opts.place_static_move_prob); - move_generator2 = std::make_unique(placer_opts.place_static_move_prob); + move_generators.first = std::make_unique(placer_state, placer_opts.place_static_move_prob); + move_generators.second = std::make_unique(placer_state, placer_opts.place_static_move_prob); } else { //RL based placement /* For the non timing driven placement: the agent has a single state * * - Available moves are (Uniform / Median / Centroid) * @@ -71,17 +73,19 @@ void create_move_generators(std::unique_ptr& move_generator, placer_opts.place_agent_epsilon); } karmed_bandit_agent1->set_step(placer_opts.place_agent_gamma, move_lim); - move_generator = std::make_unique(karmed_bandit_agent1, - noc_attraction_weight, - placer_opts.place_high_fanout_net); + move_generators.first = std::make_unique(placer_state, + karmed_bandit_agent1, + noc_attraction_weight, + placer_opts.place_high_fanout_net); //agent's 2nd state karmed_bandit_agent2 = std::make_unique(second_state_avail_moves, e_agent_space::MOVE_TYPE, placer_opts.place_agent_epsilon); karmed_bandit_agent2->set_step(placer_opts.place_agent_gamma, move_lim); - move_generator2 = std::make_unique(karmed_bandit_agent2, - noc_attraction_weight, - placer_opts.place_high_fanout_net); + move_generators.second = std::make_unique(placer_state, + karmed_bandit_agent2, + noc_attraction_weight, + placer_opts.place_high_fanout_net); } else { std::unique_ptr karmed_bandit_agent1, karmed_bandit_agent2; //agent's 1st state @@ -95,18 +99,22 @@ void create_move_generators(std::unique_ptr& move_generator, e_agent_space::MOVE_TYPE); } karmed_bandit_agent1->set_step(placer_opts.place_agent_gamma, move_lim); - move_generator = std::make_unique(karmed_bandit_agent1, - noc_attraction_weight, - placer_opts.place_high_fanout_net); + move_generators.first = std::make_unique(placer_state, + karmed_bandit_agent1, + noc_attraction_weight, + placer_opts.place_high_fanout_net); //agent's 2nd state karmed_bandit_agent2 = std::make_unique(second_state_avail_moves, e_agent_space::MOVE_TYPE); karmed_bandit_agent2->set_step(placer_opts.place_agent_gamma, move_lim); - move_generator2 = std::make_unique(karmed_bandit_agent2, - noc_attraction_weight, - placer_opts.place_high_fanout_net); + move_generators.second = std::make_unique(placer_state, + karmed_bandit_agent2, + noc_attraction_weight, + placer_opts.place_high_fanout_net); } } + + return move_generators; } void assign_current_move_generator(std::unique_ptr& move_generator, diff --git a/vpr/src/place/RL_agent_util.h b/vpr/src/place/RL_agent_util.h index b50e2caed42..afe8bf3b6cb 100644 --- a/vpr/src/place/RL_agent_util.h +++ b/vpr/src/place/RL_agent_util.h @@ -14,14 +14,21 @@ enum class e_agent_state { * * This function creates 2 move generators to be used by the annealer. The type of the move generators created here depends on the * type selected in placer_opts. - * It returns a unique pointer for each move generator in move_generator and move_generator2 - * move_lim: represents the num of moves per temp. + * + * @param placer_state Move generators store a reference to the placer context to avoid global state access. + * @param placer_opts Contains information about the placement algorithm and its parameters. + * @param move_lim represents the num of moves per temp. + * @param noc_attraction_weight The attraction weight by which the NoC-biased centroid move adjust the computed location + * towards reachable NoC routers from the moving block. + * + * @return Two unique pointers referring to move generators. These move generators are supposed to be used + * in the first and second states of the agent. + * */ -void create_move_generators(std::unique_ptr& move_generator, - std::unique_ptr& move_generator2, - const t_placer_opts& placer_opts, - int move_lim, - float noc_attraction_weight); +std::pair, std::unique_ptr> create_move_generators(PlacerState& placer_state, + const t_placer_opts& placer_opts, + int move_lim, + double noc_attraction_weight); /** * @brief copy one of the available move_generators to be the current move_generator that would be used in the placement based on the placer_options and the agent state @@ -34,7 +41,7 @@ void assign_current_move_generator(std::unique_ptr& move_generato std::unique_ptr& current_move_generator); /** - * @ brief move the updated current_move_generator to its original move_Generator structure based on he placer_options and the agent state + * @brief move the updated current_move_generator to its original move_Generator structure based on the placer_options and the agent state */ void update_move_generator(std::unique_ptr& move_generator, std::unique_ptr& move_generator2, diff --git a/vpr/src/place/analytic_placer.cpp b/vpr/src/place/analytic_placer.cpp index 4752756bba3..4bb632fbdb7 100644 --- a/vpr/src/place/analytic_placer.cpp +++ b/vpr/src/place/analytic_placer.cpp @@ -128,7 +128,9 @@ constexpr int HEAP_STALLED_ITERATIONS_STOP = 15; * Currently only initializing AP configuration parameters * Placement & device info is accessed via g_vpr_ctx */ -AnalyticPlacer::AnalyticPlacer() { + +AnalyticPlacer::AnalyticPlacer(BlkLocRegistry& blk_loc_registry) + : blk_loc_registry_ref_(blk_loc_registry) { //Eigen::initParallel(); // TODO: PlacerHeapCfg should be externally configured & supplied @@ -301,11 +303,11 @@ void AnalyticPlacer::build_legal_locations() { // initialize other data members void AnalyticPlacer::init() { const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - PlacementContext& place_ctx = g_vpr_ctx.mutable_placement(); + auto& init_block_locs = blk_loc_registry_ref_.block_locs(); for (auto blk_id : clb_nlist.blocks()) { blk_locs.insert(blk_id, BlockLocation{}); - blk_locs[blk_id].loc = place_ctx.block_locs[blk_id].loc; // transfer of initial placement + blk_locs[blk_id].loc = init_block_locs[blk_id].loc; // transfer of initial placement row_num.insert(blk_id, DONT_SOLVE); // no blocks are moved by default, until they are setup in setup_solve_blks() } @@ -320,7 +322,7 @@ void AnalyticPlacer::init() { }; for (auto blk_id : clb_nlist.blocks()) { - if (!place_ctx.block_locs[blk_id].is_fixed && has_connections(blk_id)) + if (!init_block_locs[blk_id].is_fixed && has_connections(blk_id)) // not fixed and has connections // matrix equation is formulated based on connections, so requires at least one connection if (imacro(blk_id) == NO_MACRO || macro_head(blk_id) == blk_id) { @@ -412,7 +414,7 @@ void AnalyticPlacer::setup_solve_blks(t_logical_block_type_ptr blkTypes) { void AnalyticPlacer::update_macros() { for (auto& macro : g_vpr_ctx.mutable_placement().pl_macros) { ClusterBlockId head_id = macro.members[0].blk_index; - bool mac_can_be_placed = macro_can_be_placed(macro, blk_locs[head_id].loc, true); + bool mac_can_be_placed = macro_can_be_placed(macro, blk_locs[head_id].loc, true, blk_loc_registry_ref_); //if macro can not be placed in this head pos, change the head pos if (!mac_can_be_placed) { @@ -421,7 +423,7 @@ void AnalyticPlacer::update_macros() { } //macro should be placed successfully after changing the head position - VTR_ASSERT(macro_can_be_placed(macro, blk_locs[head_id].loc, true)); + VTR_ASSERT(macro_can_be_placed(macro, blk_locs[head_id].loc, true, blk_loc_registry_ref_)); //update other member's location based on head pos for (auto member = ++macro.members.begin(); member != macro.members.end(); ++member) { @@ -741,7 +743,7 @@ std::string AnalyticPlacer::print_overlap(vtr::Matrix& overlap, FILE* fp) { void AnalyticPlacer::print_place(const char* place_file) { const DeviceContext& device_ctx = g_vpr_ctx.device(); const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - PlacementContext& place_ctx = g_vpr_ctx.mutable_placement(); + auto& block_locs = blk_loc_registry_ref_.block_locs(); FILE* fp; @@ -772,7 +774,7 @@ void AnalyticPlacer::print_place(const char* place_file) { "------------", "--------"); - if (!place_ctx.block_locs.empty()) { //Only if placement exists + if (!block_locs.empty()) { //Only if placement exists for (auto blk_id : clb_nlist.blocks()) { fprintf(fp, "%-25s %-18s %-12s %-25s %-5d %-5d %-10d #%-13zu %-8s\n", clb_nlist.block_name(blk_id).c_str(), @@ -783,7 +785,7 @@ void AnalyticPlacer::print_place(const char* place_file) { blk_locs[blk_id].loc.y, blk_locs[blk_id].loc.sub_tile, size_t(blk_id), - (place_ctx.block_locs[blk_id].is_fixed ? "true" : "false")); + (block_locs[blk_id].is_fixed ? "true" : "false")); } fprintf(fp, "\ntotal_HPWL: %d\n", total_hpwl()); vtr::Matrix overlap; diff --git a/vpr/src/place/analytic_placer.h b/vpr/src/place/analytic_placer.h index e31775adf7e..a1f4ff8dcbe 100644 --- a/vpr/src/place/analytic_placer.h +++ b/vpr/src/place/analytic_placer.h @@ -122,7 +122,8 @@ class AnalyticPlacer { * @brief Constructor of AnalyticPlacer, currently initializes AnalyticPlacerCfg for the analytic placer * To tune these parameters, change directly in constructor */ - AnalyticPlacer(); + AnalyticPlacer() = delete; + explicit AnalyticPlacer(BlkLocRegistry& blk_loc_registry); /* * @brief main function of analytic placement @@ -180,6 +181,9 @@ class AnalyticPlacer { // Lookup from blockID to block location vtr::vector_map blk_locs; + // reference to the placement location variables + BlkLocRegistry& blk_loc_registry_ref_; + /* * The set of blks of different types to be placed by AnalyticPlacement process, * i.e. the free variable blocks. diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp index a1b79b92f7a..508d8cc75da 100644 --- a/vpr/src/place/centroid_move_generator.cpp +++ b/vpr/src/place/centroid_move_generator.cpp @@ -3,6 +3,7 @@ #include "globals.h" #include "directed_moves_util.h" #include "place_constraints.h" +#include "placer_state.h" #include "move_utils.h" #include @@ -15,12 +16,16 @@ vtr::vector CentroidMoveGenerator::cluster_to_noc_gr std::map CentroidMoveGenerator::noc_router_to_noc_group_; -CentroidMoveGenerator::CentroidMoveGenerator() - : noc_attraction_w_(0.0f) +CentroidMoveGenerator::CentroidMoveGenerator(PlacerState& placer_state) + : MoveGenerator(placer_state) + , noc_attraction_w_(0.0f) , noc_attraction_enabled_(false) {} -CentroidMoveGenerator::CentroidMoveGenerator(float noc_attraction_weight, size_t high_fanout_net) - : noc_attraction_w_(noc_attraction_weight) +CentroidMoveGenerator::CentroidMoveGenerator(PlacerState& placer_state, + float noc_attraction_weight, + size_t high_fanout_net) + : MoveGenerator(placer_state) + , noc_attraction_w_(noc_attraction_weight) , noc_attraction_enabled_(true) { VTR_ASSERT(noc_attraction_weight > 0.0 && noc_attraction_weight <= 1.0); @@ -39,12 +44,20 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) { + auto& placer_state = placer_state_.get(); + const auto& block_locs = placer_state.block_locs(); + const auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& place_move_ctx = placer_state.move(); + const auto& blk_loc_registry = placer_state.blk_loc_registry(); + // Find a movable block based on blk_type ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, - false, - nullptr, - nullptr); + /*highly_crit_block=*/false, + /*net_from=*/nullptr, + /*pin_from=*/nullptr, + placer_state); VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "Centroid Move Choose Block %d - rlim %f\n", @@ -57,14 +70,11 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block return e_create_move::ABORT; } - const auto& device_ctx = g_vpr_ctx.device(); - const auto& place_ctx = g_vpr_ctx.placement(); - const auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_move_ctx = g_placer_ctx.mutable_move(); - t_pl_loc from = place_ctx.block_locs[b_from].loc; - auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); - auto grid_from_type = device_ctx.grid.get_physical_type({from.x, from.y, from.layer}); + + t_pl_loc from = block_locs[b_from].loc; + t_logical_block_type_ptr cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); + t_physical_tile_type_ptr grid_from_type = device_ctx.grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); t_range_limiters range_limiters{rlim, @@ -74,17 +84,17 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block t_pl_loc to, centroid; /* Calculate the centroid location*/ - calculate_centroid_loc(b_from, false, centroid, nullptr, noc_attraction_enabled_, noc_attraction_w_); + calculate_centroid_loc(b_from, false, centroid, nullptr, noc_attraction_enabled_, noc_attraction_w_, blk_loc_registry); // Centroid location is not necessarily a valid location, and the downstream location expects a valid // layer for the centroid location. So if the layer is not valid, we set it to the same layer as from loc. centroid.layer = (centroid.layer < 0) ? from.layer : centroid.layer; /* Find a location near the weighted centroid_loc */ - if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) { + if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from, blk_loc_registry)) { return e_create_move::ABORT; } - e_create_move create_move = ::create_move(blocks_affected, b_from, to); + e_create_move create_move = ::create_move(blocks_affected, b_from, to, blk_loc_registry); //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap if (!floorplan_legal(blocks_affected)) { @@ -135,7 +145,7 @@ void CentroidMoveGenerator::initialize_noc_groups(size_t high_fanout_net) { const auto router_block_type = cluster_ctx.clb_nlist.block_type(router_blk_ids[0]); // iterate over logical NoC routers and start a BFS - for (auto router_blk_id : router_blk_ids) { + for (ClusterBlockId router_blk_id : router_blk_ids) { if (block_visited[router_blk_id]) { continue; @@ -181,7 +191,7 @@ void CentroidMoveGenerator::initialize_noc_groups(size_t high_fanout_net) { } if (cluster_ctx.clb_nlist.pin_type(pin_id) == PinType::DRIVER) { - for (auto sink_pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { + for (ClusterPinId sink_pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { ClusterBlockId sink_block_id = cluster_ctx.clb_nlist.pin_block(sink_pin_id); if (!block_visited[sink_block_id]) { block_visited[sink_block_id] = true; diff --git a/vpr/src/place/centroid_move_generator.h b/vpr/src/place/centroid_move_generator.h index 2a0b99234c5..fb8176aa1bd 100644 --- a/vpr/src/place/centroid_move_generator.h +++ b/vpr/src/place/centroid_move_generator.h @@ -21,10 +21,13 @@ class CentroidMoveGenerator : public MoveGenerator { public: /** - * The move generator created by calling this constructor only consider + * The move generator created by calling this constructor only considers * netlist connectivity for computing the centroid location. + * + * @param placer_state A mutable reference to the placement state which will + * be stored in this object. */ - CentroidMoveGenerator(); + explicit CentroidMoveGenerator(PlacerState& placer_state); /** * The move generator created by calling this constructor considers both @@ -33,13 +36,17 @@ class CentroidMoveGenerator : public MoveGenerator { * in the graph representing the clustered netlist. When finding connected * components, none of the nets whose fanout is larger than high_fanout_net * are traversed. + * @param placer_state A mutable reference to the placement state which will + * be stored in this object. * @param noc_attraction_weight Specifies how much the computed centroid * is adjusted towards the location of NoC routers in the same NoC group as * the clustered block to be moved. * @param high_fanout_net All nets with a fanout larger than this number are * ignored when forming NoC groups. */ - CentroidMoveGenerator(float noc_attraction_weight, size_t high_fanout_net); + CentroidMoveGenerator(PlacerState& placer_state, + float noc_attraction_weight, + size_t high_fanout_net); /** diff --git a/vpr/src/place/critical_uniform_move_generator.cpp b/vpr/src/place/critical_uniform_move_generator.cpp index 9fbc93a7645..9f6641a732e 100644 --- a/vpr/src/place/critical_uniform_move_generator.cpp +++ b/vpr/src/place/critical_uniform_move_generator.cpp @@ -1,39 +1,51 @@ #include "critical_uniform_move_generator.h" #include "globals.h" #include "place_constraints.h" +#include "placer_state.h" #include "move_utils.h" -e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) { +CriticalUniformMoveGenerator::CriticalUniformMoveGenerator(PlacerState& placer_state) + : MoveGenerator(placer_state) {} + +e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& proposed_action, + float rlim, + const t_placer_opts& placer_opts, + const PlacerCriticalities* /*criticalities*/) { + auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& placer_state = placer_state_.get(); + const auto& block_locs = placer_state.block_locs(); + const auto& blk_loc_registry = placer_state.blk_loc_registry(); + ClusterNetId net_from; int pin_from; //Find a movable block based on blk_type ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, - true, + /*highly_crit_block=*/true, &net_from, - &pin_from); - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "Critical Uniform Move Choose Block %d - rlim %f\n", size_t(b_from), rlim); + &pin_from, + placer_state); - auto& place_ctx = g_vpr_ctx.placement(); - auto& cluster_ctx = g_vpr_ctx.clustering(); + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "Critical Uniform Move Choose Block %d - rlim %f\n", size_t(b_from), rlim); if (!b_from) { //No movable block found VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tNo movable block found\n"); return e_create_move::ABORT; } - t_pl_loc from = place_ctx.block_locs[b_from].loc; + t_pl_loc from = block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); t_pl_loc to; to.layer = from.layer; - if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) { + if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from, blk_loc_registry)) { return e_create_move::ABORT; } - e_create_move create_move = ::create_move(blocks_affected, b_from, to); + e_create_move create_move = ::create_move(blocks_affected, b_from, to, blk_loc_registry); //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap if (!floorplan_legal(blocks_affected)) { @@ -42,3 +54,4 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved return create_move; } + diff --git a/vpr/src/place/critical_uniform_move_generator.h b/vpr/src/place/critical_uniform_move_generator.h index a5a08af7c3b..8de342a788e 100644 --- a/vpr/src/place/critical_uniform_move_generator.h +++ b/vpr/src/place/critical_uniform_move_generator.h @@ -15,7 +15,16 @@ * Returns its choices by filling in affected_blocks. */ class CriticalUniformMoveGenerator : public MoveGenerator { - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) override; + public: + CriticalUniformMoveGenerator() = delete; + explicit CriticalUniformMoveGenerator(PlacerState& placer_state); + + private: + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& proposed_action, + float rlim, + const t_placer_opts& /*placer_opts*/, + const PlacerCriticalities* /*criticalities*/) override; }; #endif diff --git a/vpr/src/place/cut_spreader.cpp b/vpr/src/place/cut_spreader.cpp index 88b0f913346..fed8216795e 100644 --- a/vpr/src/place/cut_spreader.cpp +++ b/vpr/src/place/cut_spreader.cpp @@ -12,6 +12,7 @@ # include "globals.h" # include "vtr_log.h" # include "place_util.h" +# include "grid_block.h" // sentinel for base case in CutSpreader (i.e. only 1 block left in region) constexpr std::pair BASE_CASE = {-2, -2}; @@ -405,7 +406,7 @@ void CutSpreader::expand_regions() { std::pair CutSpreader::cut_region(SpreaderRegion& r, bool dir) { const DeviceContext& device_ctx = g_vpr_ctx.device(); const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - PlacementContext& place_ctx = g_vpr_ctx.mutable_placement(); + const auto& pl_macros = g_vpr_ctx.placement().pl_macros; // TODO: CutSpreader is not compatible with 3D FPGA VTR_ASSERT(device_ctx.grid.get_num_layers() == 1); @@ -503,7 +504,7 @@ std::pair CutSpreader::cut_region(SpreaderRegion& r, bool dir) { // while left subarea is over-utilized, move logic blocks to the right subarea one at a time while (pivot > 0 && rl.overused(ap->ap_cfg.beta)) { auto& move_blk = cut_blks.at(pivot); - int size = (imacro(move_blk) != NO_MACRO) ? place_ctx.pl_macros[imacro(move_blk)].members.size() : 1; + int size = (imacro(move_blk) != NO_MACRO) ? pl_macros[imacro(move_blk)].members.size() : 1; rl.n_blks -= size; rr.n_blks += size; pivot--; @@ -511,7 +512,7 @@ std::pair CutSpreader::cut_region(SpreaderRegion& r, bool dir) { // while right subarea is over-utilized, move logic blocks to the left subarea one at a time while (pivot < int(cut_blks.size()) - 1 && rr.overused(ap->ap_cfg.beta)) { auto& move_blk = cut_blks.at(pivot + 1); - int size = (imacro(move_blk) != NO_MACRO) ? place_ctx.pl_macros[imacro(move_blk)].members.size() : 1; + int size = (imacro(move_blk) != NO_MACRO) ? pl_macros[imacro(move_blk)].members.size() : 1; rl.n_blks += size; rr.n_blks -= size; pivot++; @@ -617,7 +618,7 @@ int CutSpreader::initial_source_cut(SpreaderRegion& r, bool dir, int& clearance_l, int& clearance_r) { - PlacementContext& place_ctx = g_vpr_ctx.mutable_placement(); + const auto& pl_macros = g_vpr_ctx.placement().pl_macros; // pivot is the midpoint of cut_blks in terms of total block size (counting macro members) // this ensures the initial partitions have similar number of blocks @@ -625,7 +626,7 @@ int CutSpreader::initial_source_cut(SpreaderRegion& r, int pivot = 0; // midpoint in terms of index of cut_blks for (auto& blk : cut_blks) { // if blk is part of macro (only macro heads in cut_blks, no macro members), add that macro's size - pivot_blks += (imacro(blk) != NO_MACRO) ? place_ctx.pl_macros[imacro(blk)].members.size() : 1; + pivot_blks += (imacro(blk) != NO_MACRO) ? pl_macros[imacro(blk)].members.size() : 1; if (pivot_blks >= r.n_blks / 2) break; pivot++; @@ -670,16 +671,16 @@ int CutSpreader::initial_target_cut(SpreaderRegion& r, int& right_blks_n, int& left_tiles_n, int& right_tiles_n) { - PlacementContext& place_ctx = g_vpr_ctx.mutable_placement(); + const auto& pl_macros = g_vpr_ctx.mutable_placement().pl_macros; // To achieve smallest difference in utilization, first move all tiles to right partition left_blks_n = 0, right_blks_n = 0; left_tiles_n = 0, right_tiles_n = r.n_tiles; // count number of blks in each partition, from initial source cut for (int i = 0; i <= init_source_cut; i++) - left_blks_n += (imacro(cut_blks.at(i)) != NO_MACRO) ? place_ctx.pl_macros[imacro(cut_blks.at(i))].members.size() : 1; + left_blks_n += (imacro(cut_blks.at(i)) != NO_MACRO) ? pl_macros[imacro(cut_blks.at(i))].members.size() : 1; for (int i = init_source_cut + 1; i < int(cut_blks.size()); i++) - right_blks_n += (imacro(cut_blks.at(i)) != NO_MACRO) ? place_ctx.pl_macros[imacro(cut_blks.at(i))].members.size() : 1; + right_blks_n += (imacro(cut_blks.at(i)) != NO_MACRO) ? pl_macros[imacro(cut_blks.at(i))].members.size() : 1; int best_tgt_cut = -1; double best_deltaU = std::numeric_limits::max(); @@ -805,14 +806,15 @@ void CutSpreader::linear_spread_subarea(std::vector& cut_blks, */ void CutSpreader::strict_legalize() { auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - auto& place_ctx = g_vpr_ctx.mutable_placement(); + const auto& block_locs = ap->blk_loc_registry_ref_.block_locs(); + const auto& pl_macros = g_vpr_ctx.placement().pl_macros; int max_x = g_vpr_ctx.device().grid.width(); int max_y = g_vpr_ctx.device().grid.height(); // clear the location of all blocks in place_ctx for (auto blk : clb_nlist.blocks()) { - if (!place_ctx.block_locs[blk].is_fixed && (ap->row_num[blk] != DONT_SOLVE || (imacro(blk) != NO_MACRO && ap->row_num[macro_head(blk)] != DONT_SOLVE))) { - unbind_tile(place_ctx.block_locs[blk].loc); + if (!block_locs[blk].is_fixed && (ap->row_num[blk] != DONT_SOLVE || (imacro(blk) != NO_MACRO && ap->row_num[macro_head(blk)] != DONT_SOLVE))) { + unbind_tile(block_locs[blk].loc); } } @@ -821,9 +823,9 @@ void CutSpreader::strict_legalize() { // length of the macro they are in (for single blocks, priority = 1). // This prioritizes the placement of longest macros over single blocks std::priority_queue> remaining; - for (auto blk : ap->solve_blks) { + for (ClusterBlockId blk : ap->solve_blks) { if (imacro(blk) != NO_MACRO) // blk is head block of a macro (only head blks are solved) - remaining.emplace(place_ctx.pl_macros[imacro(blk)].members.size(), blk); + remaining.emplace(pl_macros[imacro(blk)].members.size(), blk); else remaining.emplace(1, blk); } @@ -961,13 +963,14 @@ void CutSpreader::strict_legalize() { * Place blk on sub_tile location by modifying place_ctx.grid_blocks, place_ctx.block_locs, and ap->blk_locs[blk].loc */ void CutSpreader::bind_tile(t_pl_loc sub_tile, ClusterBlockId blk) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); - VTR_ASSERT(place_ctx.grid_blocks.block_at_location(sub_tile) == EMPTY_BLOCK_ID); - VTR_ASSERT(place_ctx.block_locs[blk].is_fixed == false); - place_ctx.grid_blocks.set_block_at_location(sub_tile, blk); - place_ctx.block_locs[blk].loc = sub_tile; - place_ctx.grid_blocks.set_usage({sub_tile.x, sub_tile.y, sub_tile.layer}, - place_ctx.grid_blocks.get_usage({sub_tile.x, sub_tile.y, sub_tile.layer}) + 1); + auto& grid_blocks = ap->blk_loc_registry_ref_.mutable_grid_blocks(); + auto& block_locs = ap->blk_loc_registry_ref_.mutable_block_locs(); + + VTR_ASSERT(grid_blocks.block_at_location(sub_tile) == ClusterBlockId::INVALID()); + VTR_ASSERT(block_locs[blk].is_fixed == false); + grid_blocks.set_block_at_location(sub_tile, blk); + block_locs[blk].loc = sub_tile; + grid_blocks.increment_usage({sub_tile.x, sub_tile.y, sub_tile.layer}); ap->blk_locs[blk].loc = sub_tile; } @@ -976,14 +979,15 @@ void CutSpreader::bind_tile(t_pl_loc sub_tile, ClusterBlockId blk) { * Remove placement at sub_tile location by clearing place_ctx.block_locs and place_Ctx.grid_blocks */ void CutSpreader::unbind_tile(t_pl_loc sub_tile) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); - VTR_ASSERT(place_ctx.grid_blocks.block_at_location(sub_tile) != EMPTY_BLOCK_ID); - ClusterBlockId blk = place_ctx.grid_blocks.block_at_location(sub_tile); - VTR_ASSERT(place_ctx.block_locs[blk].is_fixed == false); - place_ctx.block_locs[blk].loc = t_pl_loc{}; - place_ctx.grid_blocks.set_block_at_location(sub_tile, EMPTY_BLOCK_ID); - place_ctx.grid_blocks.set_usage({sub_tile.x, sub_tile.y, sub_tile.layer}, - place_ctx.grid_blocks.get_usage({sub_tile.x, sub_tile.y, sub_tile.layer}) - 1); + auto& grid_blocks = ap->blk_loc_registry_ref_.mutable_grid_blocks(); + auto& block_locs = ap->blk_loc_registry_ref_.mutable_block_locs(); + + VTR_ASSERT(grid_blocks.block_at_location(sub_tile) != ClusterBlockId::INVALID()); + ClusterBlockId blk = grid_blocks.block_at_location(sub_tile); + VTR_ASSERT(block_locs[blk].is_fixed == false); + block_locs[blk].loc = t_pl_loc{}; + grid_blocks.set_block_at_location(sub_tile, ClusterBlockId::INVALID()); + grid_blocks.decrement_usage({sub_tile.x, sub_tile.y, sub_tile.layer}); } /* @@ -992,10 +996,12 @@ void CutSpreader::unbind_tile(t_pl_loc sub_tile) { * the block in place_ctx.grid_blocks) */ bool CutSpreader::is_placed(ClusterBlockId blk) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); - if (place_ctx.block_locs[blk].loc != t_pl_loc{}) { - auto loc = place_ctx.block_locs[blk].loc; - VTR_ASSERT(place_ctx.grid_blocks.block_at_location(loc) == blk); + const auto& grid_blocks = ap->blk_loc_registry_ref_.grid_blocks(); + const auto& block_locs = ap->blk_loc_registry_ref_.block_locs(); + + if (block_locs[blk].loc != t_pl_loc{}) { + auto loc = block_locs[blk].loc; + VTR_ASSERT(grid_blocks.block_at_location(loc) == blk); return true; } return false; @@ -1025,15 +1031,15 @@ bool CutSpreader::try_place_blk(ClusterBlockId blk, int& best_inp_len, t_pl_loc& best_subtile, std::priority_queue>& remaining) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); + const auto& grid_blocks = ap->blk_loc_registry_ref_.grid_blocks(); const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - // iteration at current radius has exceed exploration limit, and a candidate sub_tile (best_subtile) is found + // iteration at current radius has exceeded exploration limit, and a candidate sub_tile (best_subtile) is found // then blk is placed in best_subtile if (exceeds_explore_limit && best_subtile != t_pl_loc{}) { // find the logic block bound to (placed on) best_subtile - ClusterBlockId bound_blk = place_ctx.grid_blocks.block_at_location(best_subtile); - if (bound_blk != EMPTY_BLOCK_ID) { // if best_subtile has a logic block + ClusterBlockId bound_blk = grid_blocks.block_at_location(best_subtile); + if (bound_blk) { // if best_subtile has a logic block unbind_tile(best_subtile); // clear bound_block and best_subtile's placement info remaining.emplace(1, bound_blk); // put bound_blk back into remaining blocks to place } @@ -1043,8 +1049,8 @@ bool CutSpreader::try_place_blk(ClusterBlockId blk, // if exploration limit is not met or a candidate sub_tile is not found yet for (auto sub_t : subtiles_at_location[nx][ny]) { // for each available sub_tile at random location - ClusterBlockId bound_blk = place_ctx.grid_blocks.block_at_location(sub_t); // logic blk at [nx, ny] - if (bound_blk == EMPTY_BLOCK_ID + ClusterBlockId bound_blk = grid_blocks.block_at_location(sub_t); // logic blk at [nx, ny] + if (bound_blk == ClusterBlockId::INVALID() || ripup_radius_met || rand() % (20000) < 10) { /* conditions when a sub_tile at nx, ny is considered: @@ -1054,7 +1060,7 @@ bool CutSpreader::try_place_blk(ClusterBlockId blk, * OR * 2) a 0.05% chance of acceptance. */ - if (bound_blk != EMPTY_BLOCK_ID && imacro(bound_blk) != NO_MACRO) + if (bound_blk && imacro(bound_blk) != NO_MACRO) // do not sub_tiles when the block placed on it is part of a macro, as they have higher priority continue; if (!exceeds_explore_limit) { // if still in exploration phase, find best_subtile with smallest best_inp_len @@ -1077,7 +1083,7 @@ bool CutSpreader::try_place_blk(ClusterBlockId blk, } break; } else { // exploration phase passed and still no best_subtile yet, choose the next compatible sub_tile - if (bound_blk != EMPTY_BLOCK_ID) { + if (bound_blk) { remaining.emplace(1, bound_blk); unbind_tile(sub_t); // remove bound_blk and place blk on sub_t } @@ -1103,7 +1109,8 @@ bool CutSpreader::try_place_macro(ClusterBlockId blk, int nx, int ny, std::priority_queue>& remaining) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); + const auto& pl_macros = g_vpr_ctx.placement().pl_macros; + const auto& grid_blocks = ap->blk_loc_registry_ref_.grid_blocks(); const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; for (auto sub_t : subtiles_at_location[nx][ny]) { @@ -1127,8 +1134,8 @@ bool CutSpreader::try_place_macro(ClusterBlockId blk, // if the target location has a logic block, ensure it's not part of a macro // because a macro placed before the current one has higher priority (longer chain) - ClusterBlockId bound = place_ctx.grid_blocks.block_at_location(target); - if (bound != EMPTY_BLOCK_ID && imacro(bound) != NO_MACRO) { + ClusterBlockId bound = grid_blocks.block_at_location(target); + if (bound && imacro(bound) != NO_MACRO) { placement_impossible = true; break; } @@ -1136,7 +1143,7 @@ bool CutSpreader::try_place_macro(ClusterBlockId blk, targets.emplace_back(visit_blk, target); if (macro_head(visit_blk) == visit_blk) { // if visit_blk is the head block of the macro // push all macro members to visit queue along with their calculated positions - const std::vector& members = place_ctx.pl_macros[imacro(blk)].members; + const std::vector& members = pl_macros[imacro(blk)].members; for (auto member = members.begin() + 1; member != members.end(); ++member) { t_pl_loc mloc = target + member->offset; // calculate member_loc using (head blk location + offset) visit.emplace(member->blk_index, mloc); @@ -1146,8 +1153,8 @@ bool CutSpreader::try_place_macro(ClusterBlockId blk, if (!placement_impossible) { // if placement is possible, apply this placement for (auto& target : targets) { - ClusterBlockId bound = place_ctx.grid_blocks.block_at_location(target.second); - if (bound != EMPTY_BLOCK_ID) { + ClusterBlockId bound = grid_blocks.block_at_location(target.second); + if (bound) { // if target location has a logic block, displace it and put it in remaining queue to be placed later unbind_tile(target.second); remaining.emplace(1, bound); diff --git a/vpr/src/place/directed_moves_util.cpp b/vpr/src/place/directed_moves_util.cpp index 7cfdab8e16c..b8a950d832a 100644 --- a/vpr/src/place/directed_moves_util.cpp +++ b/vpr/src/place/directed_moves_util.cpp @@ -2,21 +2,25 @@ #include "directed_moves_util.h" #include "centroid_move_generator.h" -void get_coordinate_of_pin(ClusterPinId pin, t_physical_tile_loc& tile_loc) { - auto& device_ctx = g_vpr_ctx.device(); - auto& grid = device_ctx.grid; - auto& place_ctx = g_vpr_ctx.placement(); - auto& cluster_ctx = g_vpr_ctx.clustering(); +t_physical_tile_loc get_coordinate_of_pin(ClusterPinId pin, + const BlkLocRegistry& blk_loc_registry) { + const auto& device_ctx = g_vpr_ctx.device(); + const auto& grid = device_ctx.grid; + const auto& cluster_ctx = g_vpr_ctx.clustering(); - int pnum = tile_pin_index(pin); + int pnum = blk_loc_registry.tile_pin_index(pin); ClusterBlockId block = cluster_ctx.clb_nlist.pin_block(pin); - tile_loc.x = place_ctx.block_locs[block].loc.x + physical_tile_type(block)->pin_width_offset[pnum]; - tile_loc.y = place_ctx.block_locs[block].loc.y + physical_tile_type(block)->pin_height_offset[pnum]; - tile_loc.layer_num = place_ctx.block_locs[block].loc.layer; + t_physical_tile_loc tile_loc; + t_pl_loc block_loc = blk_loc_registry.block_locs()[block].loc; + tile_loc.x = block_loc.x + physical_tile_type(block_loc)->pin_width_offset[pnum]; + tile_loc.y = block_loc.y + physical_tile_type(block_loc)->pin_height_offset[pnum]; + tile_loc.layer_num = block_loc.layer; tile_loc.x = std::max(std::min(tile_loc.x, (int)grid.width() - 2), 1); //-2 for no perim channels tile_loc.y = std::max(std::min(tile_loc.y, (int)grid.height() - 2), 1); //-2 for no perim channels + + return tile_loc; } void calculate_centroid_loc(ClusterBlockId b_from, @@ -24,19 +28,18 @@ void calculate_centroid_loc(ClusterBlockId b_from, t_pl_loc& centroid, const PlacerCriticalities* criticalities, bool noc_attraction_enabled, - float noc_attraction_weight) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); + float noc_attraction_weight, + const BlkLocRegistry& blk_loc_registry) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& block_locs = blk_loc_registry.block_locs(); - t_physical_tile_loc tile_loc; - int ipin; float acc_weight = 0; float acc_x = 0; float acc_y = 0; float acc_layer = 0; float weight = 1; - int from_block_layer_num = g_vpr_ctx.placement().block_locs[b_from].loc.layer; + int from_block_layer_num = block_locs[b_from].loc.layer; VTR_ASSERT(from_block_layer_num != OPEN); //iterate over the from block pins @@ -68,14 +71,14 @@ void calculate_centroid_loc(ClusterBlockId b_from, * This case rarely happens but causes QoR degradation */ if (pin_id == sink_pin_id) continue; - ipin = cluster_ctx.clb_nlist.pin_net_index(sink_pin_id); + int ipin = cluster_ctx.clb_nlist.pin_net_index(sink_pin_id); if (timing_weights) { weight = criticalities->criticality(net_id, ipin); } else { weight = 1; } - get_coordinate_of_pin(sink_pin_id, tile_loc); + t_physical_tile_loc tile_loc = get_coordinate_of_pin(sink_pin_id, blk_loc_registry); acc_x += tile_loc.x * weight; acc_y += tile_loc.y * weight; @@ -86,7 +89,7 @@ void calculate_centroid_loc(ClusterBlockId b_from, //else the pin is sink --> only care about its driver else { - ipin = cluster_ctx.clb_nlist.pin_net_index(pin_id); + int ipin = cluster_ctx.clb_nlist.pin_net_index(pin_id); if (timing_weights) { weight = criticalities->criticality(net_id, ipin); } else { @@ -95,7 +98,7 @@ void calculate_centroid_loc(ClusterBlockId b_from, ClusterPinId source_pin = cluster_ctx.clb_nlist.net_driver(net_id); - get_coordinate_of_pin(source_pin, tile_loc); + t_physical_tile_loc tile_loc = get_coordinate_of_pin(source_pin, blk_loc_registry); acc_x += tile_loc.x * weight; acc_y += tile_loc.y * weight; @@ -118,7 +121,7 @@ void calculate_centroid_loc(ClusterBlockId b_from, acc_weight *= (1.0f - noc_attraction_weight); for (ClusterBlockId router_blk_id : noc_routers) { - t_block_loc router_loc = place_ctx.block_locs[router_blk_id]; + t_block_loc router_loc = block_locs[router_blk_id]; acc_x += router_loc.loc.x * single_noc_weight; acc_y += router_loc.loc.y * single_noc_weight; acc_weight += single_noc_weight; @@ -133,10 +136,10 @@ void calculate_centroid_loc(ClusterBlockId b_from, } static std::map available_reward_function = { - {"basic", BASIC}, - {"nonPenalizing_basic", NON_PENALIZING_BASIC}, - {"runtime_aware", RUNTIME_AWARE}, - {"WLbiased_runtime_aware", WL_BIASED_RUNTIME_AWARE}}; + {"basic", e_reward_function::BASIC}, + {"nonPenalizing_basic", e_reward_function::NON_PENALIZING_BASIC}, + {"runtime_aware", e_reward_function::RUNTIME_AWARE}, + {"WLbiased_runtime_aware", e_reward_function::WL_BIASED_RUNTIME_AWARE}}; e_reward_function string_to_reward(const std::string& st) { return available_reward_function[st]; diff --git a/vpr/src/place/directed_moves_util.h b/vpr/src/place/directed_moves_util.h index dc2f07c4643..cef889d5e4e 100644 --- a/vpr/src/place/directed_moves_util.h +++ b/vpr/src/place/directed_moves_util.h @@ -7,7 +7,7 @@ /** * @brief enum represents the different reward functions */ -enum e_reward_function { +enum class e_reward_function { BASIC, ///@ directly uses the change of the annealing cost function NON_PENALIZING_BASIC, ///@ same as basic reward function but with 0 reward if it's a hill-climbing one RUNTIME_AWARE, ///@ same as NON_PENALIZING_BASIC but with normalizing with the runtime factor of each move type @@ -17,7 +17,8 @@ enum e_reward_function { e_reward_function string_to_reward(const std::string& st); ///@brief Helper function that returns the x, y coordinates of a pin -void get_coordinate_of_pin(ClusterPinId pin, t_physical_tile_loc& tile_loc); +t_physical_tile_loc get_coordinate_of_pin(ClusterPinId pin, + const BlkLocRegistry& blk_loc_registry); /** * @brief Calculates the exact centroid location @@ -46,13 +47,15 @@ void calculate_centroid_loc(ClusterBlockId b_from, t_pl_loc& centroid, const PlacerCriticalities* criticalities, bool noc_attraction_enabled, - float noc_attraction_weight); + float noc_attraction_weight, + const BlkLocRegistry& blk_loc_registry); inline void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc& centroid, - const PlacerCriticalities* criticalities) { - calculate_centroid_loc(b_from, timing_weights, centroid, criticalities, false, 0.0f); + const PlacerCriticalities* criticalities, + const BlkLocRegistry& blk_loc_registry) { + calculate_centroid_loc(b_from, timing_weights, centroid, criticalities, false, 0.0f, blk_loc_registry); } #endif diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp index 995c2a37836..d321f0d2ebd 100644 --- a/vpr/src/place/feasible_region_move_generator.cpp +++ b/vpr/src/place/feasible_region_move_generator.cpp @@ -1,19 +1,37 @@ #include "feasible_region_move_generator.h" + #include "globals.h" -#include -#include "math.h" #include "place_constraints.h" +#include "placer_state.h" #include "move_utils.h" -e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { +#include +#include + +FeasibleRegionMoveGenerator::FeasibleRegionMoveGenerator(PlacerState& placer_state) + : MoveGenerator(placer_state) {} + +e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& proposed_action, + float rlim, + const t_placer_opts& placer_opts, + const PlacerCriticalities* criticalities) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& placer_state = placer_state_.get(); + auto& place_move_ctx = placer_state.mutable_move(); + const auto& block_locs = placer_state.block_locs(); + const auto& blk_loc_registry = placer_state.blk_loc_registry(); + ClusterNetId net_from; int pin_from; //Find a movable block based on blk_type ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, - true, + /*highly_crit_block=*/true, &net_from, - &pin_from); + &pin_from, + placer_state); + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "Feasible Region Move Choose Block %di - rlim %f\n", size_t(b_from), rlim); if (!b_from) { //No movable block found @@ -21,12 +39,8 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& return e_create_move::ABORT; } - auto& place_ctx = g_vpr_ctx.placement(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_move_ctx = g_placer_ctx.mutable_move(); - //from block data - t_pl_loc from = place_ctx.block_locs[b_from].loc; + t_pl_loc from = block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); @@ -35,8 +49,6 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& t_pl_loc to; // Currently, we don't change the layer for this move to.layer = from.layer; - int ipin; - ClusterBlockId bnum; int max_x, min_x, max_y, min_y; place_move_ctx.X_coord.clear(); @@ -47,11 +59,11 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) continue; - ipin = cluster_ctx.clb_nlist.pin_net_index(pin_id); + int ipin = cluster_ctx.clb_nlist.pin_net_index(pin_id); if (criticalities->criticality(net_id, ipin) > placer_opts.place_crit_limit) { - bnum = cluster_ctx.clb_nlist.net_driver_block(net_id); - place_move_ctx.X_coord.push_back(place_ctx.block_locs[bnum].loc.x); - place_move_ctx.Y_coord.push_back(place_ctx.block_locs[bnum].loc.y); + ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id); + place_move_ctx.X_coord.push_back(block_locs[bnum].loc.x); + place_move_ctx.Y_coord.push_back(block_locs[bnum].loc.y); } } if (!place_move_ctx.X_coord.empty()) { @@ -69,7 +81,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& //Get the most critical output of the node int xt, yt; ClusterBlockId b_output = cluster_ctx.clb_nlist.net_pin_block(net_from, pin_from); - t_pl_loc output_loc = place_ctx.block_locs[b_output].loc; + t_pl_loc output_loc = block_locs[b_output].loc; xt = output_loc.x; yt = output_loc.y; @@ -113,7 +125,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& placer_opts.place_dm_rlim}; // Try to find a legal location inside the feasible region - if (!find_to_loc_median(cluster_from_type, from, &FR_coords, to, b_from)) { + if (!find_to_loc_median(cluster_from_type, from, &FR_coords, to, b_from, blk_loc_registry)) { /** If there is no legal location in the feasible region, calculate the center of the FR and try to find a legal location * in a range around this center. */ @@ -122,11 +134,11 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& center.y = (FR_coords.ymin + FR_coords.ymax) / 2; // TODO: Currently, we don't move blocks between different types of layers center.layer = from.layer; - if (!find_to_loc_centroid(cluster_from_type, from, center, range_limiters, to, b_from)) + if (!find_to_loc_centroid(cluster_from_type, from, center, range_limiters, to, b_from, blk_loc_registry)) return e_create_move::ABORT; } - e_create_move create_move = ::create_move(blocks_affected, b_from, to); + e_create_move create_move = ::create_move(blocks_affected, b_from, to, blk_loc_registry); //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap if (!floorplan_legal(blocks_affected)) { @@ -134,4 +146,4 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& } return create_move; -} +} \ No newline at end of file diff --git a/vpr/src/place/feasible_region_move_generator.h b/vpr/src/place/feasible_region_move_generator.h index 0f635c00a57..1d0e3a9224e 100644 --- a/vpr/src/place/feasible_region_move_generator.h +++ b/vpr/src/place/feasible_region_move_generator.h @@ -19,7 +19,16 @@ * */ class FeasibleRegionMoveGenerator : public MoveGenerator { - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) override; + public: + FeasibleRegionMoveGenerator() = delete; + explicit FeasibleRegionMoveGenerator(PlacerState& placer_state); + + private: + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& proposed_action, + float rlim, + const t_placer_opts& placer_opts, + const PlacerCriticalities* criticalities) override; }; #endif diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp index 8532d063133..cd6c9c957ba 100644 --- a/vpr/src/place/initial_noc_placement.cpp +++ b/vpr/src/place/initial_noc_placement.cpp @@ -1,5 +1,7 @@ #include "initial_noc_placment.h" + +#include "vpr_types.h" #include "initial_placement.h" #include "noc_place_utils.h" #include "noc_place_checkpoint.h" @@ -31,8 +33,11 @@ static bool accept_noc_swap(double delta_cost, double prob); * @brief Places a constrained NoC router within its partition region. * * @param router_blk_id NoC router cluster block ID + * @param blk_loc_registry Placement block location information. To be + * filled with the location where pl_macro is placed. */ -static void place_constrained_noc_router(ClusterBlockId router_blk_id); +static void place_constrained_noc_router(ClusterBlockId router_blk_id, + BlkLocRegistry& blk_loc_registry); /** * @brief Randomly places unconstrained NoC routers. @@ -40,16 +45,22 @@ static void place_constrained_noc_router(ClusterBlockId router_blk_id); * @param unfixed_routers Contains the cluster block ID for all unconstrained * NoC routers. * @param seed Used for shuffling NoC routers. + * @param blk_loc_registry Placement block location information. To be filled + * with the location where pl_macro is placed. */ static void place_noc_routers_randomly(std::vector& unfixed_routers, - int seed); + int seed, + BlkLocRegistry& blk_loc_registry); /** * @brief Runs a simulated annealing optimizer for NoC routers. * * @param noc_opts Contains weighting factors for NoC cost terms. + * @param blk_loc_registry Placement block location information. + * To be filled with the location where pl_macro is placed. */ -static void noc_routers_anneal(const t_noc_opts& noc_opts); +static void noc_routers_anneal(const t_noc_opts& noc_opts, + BlkLocRegistry& blk_loc_registry); static bool accept_noc_swap(double delta_cost, double prob) { if (delta_cost <= 0.0) { @@ -68,7 +79,8 @@ static bool accept_noc_swap(double delta_cost, double prob) { } } -static void place_constrained_noc_router(ClusterBlockId router_blk_id) { +static void place_constrained_noc_router(ClusterBlockId router_blk_id, + BlkLocRegistry& blk_loc_registry) { auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& floorplanning_ctx = g_vpr_ctx.floorplanning(); @@ -84,11 +96,11 @@ static void place_constrained_noc_router(ClusterBlockId router_blk_id) { bool macro_placed = false; for (int i_try = 0; i_try < MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY && !macro_placed; i_try++) { - macro_placed = try_place_macro_randomly(pl_macro, pr, block_type, FREE); + macro_placed = try_place_macro_randomly(pl_macro, pr, block_type, e_pad_loc_type::FREE, blk_loc_registry); } if (!macro_placed) { - macro_placed = try_place_macro_exhaustively(pl_macro, pr, block_type, FREE); + macro_placed = try_place_macro_exhaustively(pl_macro, pr, block_type, e_pad_loc_type::FREE, blk_loc_registry); } if (!macro_placed) { @@ -96,11 +108,14 @@ static void place_constrained_noc_router(ClusterBlockId router_blk_id) { } } -static void place_noc_routers_randomly(std::vector& unfixed_routers, int seed) { - auto& place_ctx = g_vpr_ctx.placement(); - auto& noc_ctx = g_vpr_ctx.noc(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& device_ctx = g_vpr_ctx.device(); +static void place_noc_routers_randomly(std::vector& unfixed_routers, + int seed, + BlkLocRegistry& blk_loc_registry) { + const auto& compressed_grids = g_vpr_ctx.placement().compressed_block_grids; + const auto& noc_ctx = g_vpr_ctx.noc(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& device_ctx = g_vpr_ctx.device(); + const GridBlock& grid_blocks = blk_loc_registry.grid_blocks(); /* * Unconstrained NoC routers are placed randomly, then NoC cost is optimized using simulated annealing. @@ -127,11 +142,11 @@ static void place_noc_routers_randomly(std::vector& unfixed_rout const auto router_block_type = cluster_ctx.clb_nlist.block_type(noc_ctx.noc_traffic_flows_storage.get_router_clusters_in_netlist()[0]); // Get the compressed grid for NoC - const auto& compressed_noc_grid = place_ctx.compressed_block_grids[router_block_type->index]; + const auto& compressed_noc_grid = compressed_grids[router_block_type->index]; // Iterate over shuffled physical routers to place logical routers // Since physical routers are shuffled, router placement would be random - for (const auto& phy_router : noc_phy_routers) { + for (const NocRouter& phy_router : noc_phy_routers) { t_physical_tile_loc router_phy_loc = phy_router.get_router_physical_location(); // Find a compatible sub-tile @@ -141,7 +156,7 @@ static void place_noc_routers_randomly(std::vector& unfixed_rout t_pl_loc loc(router_phy_loc, sub_tile); - if (place_ctx.grid_blocks.is_sub_tile_empty(router_phy_loc, sub_tile)) { + if (grid_blocks.is_sub_tile_empty(router_phy_loc, sub_tile)) { // Pick one of the unplaced routers auto logical_router_bid = unfixed_routers.back(); unfixed_routers.pop_back(); @@ -153,7 +168,7 @@ static void place_noc_routers_randomly(std::vector& unfixed_rout t_pl_macro pl_macro; pl_macro.members.push_back(macro_member); - bool legal = try_place_macro(pl_macro, loc); + bool legal = try_place_macro(pl_macro, loc, blk_loc_registry); if (!legal) { VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Could not place a router cluster into an empty physical router."); } @@ -166,8 +181,10 @@ static void place_noc_routers_randomly(std::vector& unfixed_rout } // end for of random router placement } -static void noc_routers_anneal(const t_noc_opts& noc_opts) { +static void noc_routers_anneal(const t_noc_opts& noc_opts, + BlkLocRegistry& blk_loc_registry) { auto& noc_ctx = g_vpr_ctx.noc(); + const auto& block_locs = blk_loc_registry.block_locs(); // Only NoC related costs are considered t_placer_costs costs; @@ -219,17 +236,16 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) { // Generate and evaluate router moves for (int i_move = 0; i_move < N_MOVES; i_move++) { - e_create_move create_move_outcome = e_create_move::ABORT; blocks_affected.clear_move_blocks(); // Shrink the range limit over time float r_lim_decayed = 1.0f + (N_MOVES - i_move) * (max_r_lim / N_MOVES); - create_move_outcome = propose_router_swap(blocks_affected, r_lim_decayed); + e_create_move create_move_outcome = propose_router_swap(blocks_affected, r_lim_decayed, blk_loc_registry); if (create_move_outcome != e_create_move::ABORT) { - apply_move_blocks(blocks_affected); + apply_move_blocks(blocks_affected, blk_loc_registry); NocCostTerms noc_delta_c; - find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c); + find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c, block_locs); double delta_cost = calculate_noc_cost(noc_delta_c, costs.noc_cost_norm_factors, noc_opts); double prob = starting_prob - i_move * prob_step; @@ -237,28 +253,31 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) { if (move_accepted) { costs.cost += delta_cost; - commit_move_blocks(blocks_affected); + commit_move_blocks(blocks_affected, blk_loc_registry.mutable_grid_blocks()); commit_noc_costs(); costs += noc_delta_c; // check if the current placement is better than the stored checkpoint if (costs.cost < checkpoint.get_cost() || !checkpoint.is_valid()) { - checkpoint.save_checkpoint(costs.cost); + checkpoint.save_checkpoint(costs.cost, block_locs); } } else { // The proposed move is rejected - revert_move_blocks(blocks_affected); - revert_noc_traffic_flow_routes(blocks_affected); + revert_move_blocks(blocks_affected, blk_loc_registry); + revert_noc_traffic_flow_routes(blocks_affected, block_locs); } } } if (checkpoint.get_cost() < costs.cost) { - checkpoint.restore_checkpoint(costs); + checkpoint.restore_checkpoint(costs, blk_loc_registry); } } -void initial_noc_placement(const t_noc_opts& noc_opts, const t_placer_opts& placer_opts) { +void initial_noc_placement(const t_noc_opts& noc_opts, + const t_placer_opts& placer_opts, + BlkLocRegistry& blk_loc_registry) { vtr::ScopedStartFinishTimer timer("Initial NoC Placement"); auto& noc_ctx = g_vpr_ctx.noc(); + const auto& block_locs = blk_loc_registry.block_locs(); // Get all the router clusters const std::vector& router_blk_ids = noc_ctx.noc_traffic_flows_storage.get_router_clusters_in_netlist(); @@ -266,30 +285,30 @@ void initial_noc_placement(const t_noc_opts& noc_opts, const t_placer_opts& plac std::vector unfixed_routers; // Check for floorplanning constraints and place constrained NoC routers - for (auto router_blk_id : router_blk_ids) { + for (ClusterBlockId router_blk_id : router_blk_ids) { // The block is fixed and was placed in mark_fixed_blocks() - if (is_block_placed((router_blk_id))) { + if (is_block_placed(router_blk_id, block_locs)) { continue; } if (is_cluster_constrained(router_blk_id)) { - place_constrained_noc_router(router_blk_id); + place_constrained_noc_router(router_blk_id, blk_loc_registry); } else { unfixed_routers.push_back(router_blk_id); } } // Place unconstrained NoC routers randomly - place_noc_routers_randomly(unfixed_routers, placer_opts.seed); + place_noc_routers_randomly(unfixed_routers, placer_opts.seed, blk_loc_registry); // populate internal data structures to maintain route, bandwidth usage, and latencies - initial_noc_routing({}); + initial_noc_routing({}, block_locs); // Run the simulated annealing optimizer for NoC routers - noc_routers_anneal(noc_opts); + noc_routers_anneal(noc_opts, blk_loc_registry); // check if there is any cycles - bool has_cycle = noc_routing_has_cycle(); + bool has_cycle = noc_routing_has_cycle(block_locs); if (has_cycle) { VPR_FATAL_ERROR(VPR_ERROR_PLACE, "At least one cycle was found in NoC channel dependency graph. This may cause a deadlock " diff --git a/vpr/src/place/initial_noc_placment.h b/vpr/src/place/initial_noc_placment.h index 7727f15f6aa..3a37f95969c 100644 --- a/vpr/src/place/initial_noc_placment.h +++ b/vpr/src/place/initial_noc_placment.h @@ -1,15 +1,22 @@ -#ifndef VTR_INITIAL_NOC_PLACMENT_H -#define VTR_INITIAL_NOC_PLACMENT_H +#ifndef VTR_INITIAL_NOC_PLACEMENT_H +#define VTR_INITIAL_NOC_PLACEMENT_H -#include "vpr_types.h" +struct t_noc_opts; +struct t_placer_opts; +class BlkLocRegistry; /** * @brief Randomly places NoC routers, then runs a quick simulated annealing * to minimize NoC costs. * * @param noc_opts NoC-related options. Used to calculate NoC-related costs. + * @param placer_opts Contain the placement algorithm options including the seed. + * @param blk_loc_registry Placement block location information. To be filled + * with the location where pl_macro is placed. */ -void initial_noc_placement(const t_noc_opts& noc_opts, const t_placer_opts& placer_opts); +void initial_noc_placement(const t_noc_opts& noc_opts, + const t_placer_opts& placer_opts, + BlkLocRegistry& blk_loc_registry); -#endif //VTR_INITIAL_NOC_PLACMENT_H +#endif //VTR_INITIAL_NOC_PLACEMENT_H diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index f7644a2a808..8636de52759 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -1,6 +1,7 @@ #include "vtr_memory.h" #include "vtr_random.h" #include "vtr_time.h" +#include "vpr_types.h" #include "globals.h" #include "read_place.h" @@ -12,9 +13,9 @@ #include "move_utils.h" #include "region.h" #include "directed_moves_util.h" -#include "vpr_types.h" #include "echo_files.h" + #include #include #include @@ -36,15 +37,20 @@ static constexpr int SORT_WEIGHT_PER_TILES_OUTSIDE_OF_PR = 100; * @brief Set chosen grid locations to EMPTY block id before each placement iteration * * @param unplaced_blk_types_index Block types that their grid locations must be cleared. - * + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. */ -static void clear_block_type_grid_locs(const std::unordered_set& unplaced_blk_types_index); +static void clear_block_type_grid_locs(const std::unordered_set& unplaced_blk_types_index, + BlkLocRegistry& blk_loc_registry); /** * @brief Initializes the grid to empty. It also initialized the location for * all blocks to unplaced. + * + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. */ -static void clear_all_grid_locs(); +static void clear_all_grid_locs(BlkLocRegistry& blk_loc_registry); /** * @brief Control routine for placing a macro. @@ -61,14 +67,17 @@ static void clear_all_grid_locs(); * @param pad_loc_type Used to check whether an io block needs to be marked as fixed. * @param blk_types_empty_locs_in_grid First location (lowest y) and number of remaining blocks in each column for the blk_id type. * @param block_scores The block_scores (ranking of what to place next) for unplaced blocks connected to this macro should be updated. + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. * * @return true if macro was placed, false if not. */ static bool place_macro(int macros_max_num_tries, const t_pl_macro& pl_macro, - enum e_pad_loc_type pad_loc_type, + e_pad_loc_type pad_loc_type, std::vector* blk_types_empty_locs_in_grid, - vtr::vector& block_scores); + vtr::vector& block_scores, + BlkLocRegistry& blk_loc_registry); /* * Assign scores to each block based on macro size and floorplanning constraints. @@ -79,14 +88,16 @@ static vtr::vector assign_block_scores(); /** * @brief Tries to find y coordinate for macro head location based on macro direction - * * * @param first_macro_loc The first available location that can place the macro blocks. * @param pl_macro The macro to be placed. + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. * * @return y coordinate of the location that macro head should be placed */ -static int get_y_loc_based_on_macro_direction(t_grid_empty_locs_block_type first_macro_loc, const t_pl_macro& pl_macro); +static int get_y_loc_based_on_macro_direction(t_grid_empty_locs_block_type first_macro_loc, + const t_pl_macro& pl_macro); /** * @brief Tries to get the first available location of a specific block type that can accommodate macro blocks @@ -128,8 +139,13 @@ static std::vector init_blk_types_empty_locations( * @param pl_macro The macro to be fixed. * @param loc The location at which the head of the macro is placed. * @param pad_loc_type Used to check whether an io block needs to be marked as fixed. + * @param block_locs Clustered block locations used to mark the IO blocks that are to be placed + * randomly as fixed. */ -static inline void fix_IO_block_types(const t_pl_macro& pl_macro, t_pl_loc loc, enum e_pad_loc_type pad_loc_type); +static inline void fix_IO_block_types(const t_pl_macro& pl_macro, + t_pl_loc loc, + e_pad_loc_type pad_loc_type, + vtr::vector_map& block_locs); /** * @brief Determine whether a specific macro can be placed in a specific location. @@ -150,10 +166,14 @@ static bool is_loc_legal(const t_pl_loc& loc, * * @param pl_macro The macro to be placed. * @param centroid specified location (x,y,subtile) for the pl_macro head member. + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. * * @return a vector of blocks that are connected to this block but not yet placed so their scores can later be updated. */ -static std::vector find_centroid_loc(const t_pl_macro& pl_macro, t_pl_loc& centroid); +static std::vector find_centroid_loc(const t_pl_macro& pl_macro, + t_pl_loc& centroid, + const BlkLocRegistry& blk_loc_registry); /** * @brief Tries to find a nearest location to the centroid location if calculated centroid location is not legal or is occupied. @@ -161,10 +181,15 @@ static std::vector find_centroid_loc(const t_pl_macro& pl_macro, * @param centroid_loc Calculated location in try_centroid_placement function for the block. * @param block_type Logical block type of the macro blocks. * @param search_for_empty If set, the function tries to find an empty location. + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. * * @return true if the function can find any location near the centroid one, false otherwise. */ -static bool find_centroid_neighbor(t_pl_loc& centroid_loc, t_logical_block_type_ptr block_type, bool search_for_empty); +static bool find_centroid_neighbor(t_pl_loc& centroid_loc, + t_logical_block_type_ptr block_type, + bool search_for_empty, + const BlkLocRegistry& blk_loc_registry); /** * @brief tries to place a macro at a centroid location of its placed connections. @@ -174,15 +199,18 @@ static bool find_centroid_neighbor(t_pl_loc& centroid_loc, t_logical_block_type_ * constrained. * @param block_type Logical block type of the macro blocks. * @param pad_loc_type Used to check whether an io block needs to be marked as fixed. - * @param block_scores The block_scores (ranking of what to place next) for unplaced blocks connected to this macro are updated in this routine. + * @param block_scores The block_scores (ranking of what to place next) for unplaced blocks connected to this macro are updated in this routine. + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. * * @return true if the macro gets placed, false if not. */ static bool try_centroid_placement(const t_pl_macro& pl_macro, const PartitionRegion& pr, t_logical_block_type_ptr block_type, - enum e_pad_loc_type pad_loc_type, - vtr::vector& block_scores); + e_pad_loc_type pad_loc_type, + vtr::vector& block_scores, + BlkLocRegistry& blk_loc_registry); /** * @brief Looks for a valid placement location for macro in second iteration, tries to place as many macros as possible in one column @@ -194,47 +222,53 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro, * @param block_type Logical block type of the macro blocks. * @param pad_loc_type Used to check whether an io block needs to be marked as fixed. * @param blk_types_empty_locs_in_grid first location (lowest y) and number of remaining blocks in each column for the blk_id type + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. * * @return true if the macro gets placed, false if not. */ static bool try_dense_placement(const t_pl_macro& pl_macro, const PartitionRegion& pr, t_logical_block_type_ptr block_type, - enum e_pad_loc_type pad_loc_type, - std::vector* blk_types_empty_locs_in_grid); + e_pad_loc_type pad_loc_type, + std::vector* blk_types_empty_locs_in_grid, + BlkLocRegistry& blk_loc_registry); /** * @brief Tries for MAX_INIT_PLACE_ATTEMPTS times to place all blocks considering their floorplanning constraints and the device size * * @param pad_loc_type Used to check whether an io block needs to be marked as fixed. * @param constraints_file Used to read block locations if any constraints is available. + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. */ static void place_all_blocks(const t_placer_opts& placer_opts, vtr::vector& block_scores, - enum e_pad_loc_type pad_loc_type, - const char* constraints_file); + e_pad_loc_type pad_loc_type, + const char* constraints_file, + BlkLocRegistry& blk_loc_registry); /** * @brief If any blocks remain unplaced after all initial placement iterations, this routine * throws an error indicating that initial placement can not be done with the current device size or * floorplanning constraints. */ -static void check_initial_placement_legality(); +static void check_initial_placement_legality(const vtr::vector_map& block_locs); /** * @brief Fills movable_blocks in global PlacementContext */ -static void alloc_and_load_movable_blocks(); +static void alloc_and_load_movable_blocks(const vtr::vector_map& block_locs); -static void check_initial_placement_legality() { +static void check_initial_placement_legality(const vtr::vector_map& block_locs) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.placement(); auto& device_ctx = g_vpr_ctx.device(); int unplaced_blocks = 0; - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - if (place_ctx.block_locs[blk_id].loc.x == INVALID_X) { + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + if (block_locs[blk_id].loc.x == INVALID_X) { VTR_LOG("Block %s (# %d) of type %s could not be placed during initial placement iteration %d\n", cluster_ctx.clb_nlist.block_name(blk_id).c_str(), blk_id, @@ -252,7 +286,7 @@ static void check_initial_placement_legality() { } for (auto movable_blk_id : place_ctx.movable_blocks) { - if (place_ctx.block_locs[movable_blk_id].is_fixed) { + if (block_locs[movable_blk_id].is_fixed) { VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Fixed block was mistakenly marked as movable during initial placement.\n"); } } @@ -260,7 +294,7 @@ static void check_initial_placement_legality() { for (const auto& logical_block_type : device_ctx.logical_block_types) { const auto& movable_blocks_of_type = place_ctx.movable_blocks_per_type[logical_block_type.index]; for (const auto& movable_blk_id : movable_blocks_of_type) { - if (place_ctx.block_locs[movable_blk_id].is_fixed) { + if (block_locs[movable_blk_id].is_fixed) { VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Fixed block %d of logical type %s was mistakenly marked as movable during initial placement.\n", (size_t)movable_blk_id, logical_block_type.name); } @@ -274,10 +308,9 @@ static void check_initial_placement_legality() { } } -bool is_block_placed(ClusterBlockId blk_id) { - auto& place_ctx = g_vpr_ctx.placement(); - - return (place_ctx.block_locs[blk_id].loc.x != INVALID_X); +bool is_block_placed(ClusterBlockId blk_id, + const vtr::vector_map& block_locs) { + return (block_locs[blk_id].loc.x != INVALID_X); } static bool is_loc_legal(const t_pl_loc& loc, @@ -313,7 +346,10 @@ static bool is_loc_legal(const t_pl_loc& loc, return legal; } -static bool find_centroid_neighbor(t_pl_loc& centroid_loc, t_logical_block_type_ptr block_type, bool search_for_empty) { +static bool find_centroid_neighbor(t_pl_loc& centroid_loc, + t_logical_block_type_ptr block_type, + bool search_for_empty, + const BlkLocRegistry& blk_loc_registry) { const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index]; const int num_layers = g_vpr_ctx.device().grid.get_num_layers(); const int centroid_loc_layer_num = centroid_loc.layer; @@ -345,9 +381,10 @@ static bool find_centroid_neighbor(t_pl_loc& centroid_loc, t_logical_block_type_ {cx_from, cy_from, layer_from}, search_range, to_compressed_loc, - false, + /*is_median=*/false, centroid_loc_layer_num, - search_for_empty); + search_for_empty, + blk_loc_registry); if (!legal) { return false; @@ -358,20 +395,21 @@ static bool find_centroid_neighbor(t_pl_loc& centroid_loc, t_logical_block_type_ return legal; } -static std::vector find_centroid_loc(const t_pl_macro& pl_macro, t_pl_loc& centroid) { - auto& cluster_ctx = g_vpr_ctx.clustering(); +static std::vector find_centroid_loc(const t_pl_macro& pl_macro, + t_pl_loc& centroid, + const BlkLocRegistry& blk_loc_registry) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& block_locs = blk_loc_registry.block_locs(); - t_physical_tile_loc tile_loc; float acc_weight = 0; float acc_x = 0; float acc_y = 0; - int head_layer_num = OPEN; bool find_layer = false; std::vector layer_count(g_vpr_ctx.device().grid.get_num_layers(), 0); ClusterBlockId head_blk = pl_macro.members.at(0).blk_index; // For now, we put the macro in the same layer as the head block - head_layer_num = g_vpr_ctx.placement().block_locs[head_blk].loc.layer; + int head_layer_num = block_locs[head_blk].loc.layer; // If block is placed, we use the layer of the block. Otherwise, the layer will be determined later if (head_layer_num == OPEN) { find_layer = true; @@ -399,18 +437,18 @@ static std::vector find_centroid_loc(const t_pl_macro& pl_macro, if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) { continue; } - for (auto sink_pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { + for (ClusterPinId sink_pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { /* Ignore if one of the sinks is the block itself*/ if (pin_id == sink_pin_id) continue; - if (!is_block_placed(cluster_ctx.clb_nlist.pin_block(sink_pin_id))) { + if (!is_block_placed(cluster_ctx.clb_nlist.pin_block(sink_pin_id), block_locs)) { //add unplaced block to connected_blocks_to_update vector to update its score later. connected_blocks_to_update.push_back(cluster_ctx.clb_nlist.pin_block(sink_pin_id)); continue; } - get_coordinate_of_pin(sink_pin_id, tile_loc); + t_physical_tile_loc tile_loc = get_coordinate_of_pin(sink_pin_id, blk_loc_registry); if (find_layer) { VTR_ASSERT(tile_loc.layer_num != OPEN); layer_count[tile_loc.layer_num]++; @@ -424,13 +462,13 @@ static std::vector find_centroid_loc(const t_pl_macro& pl_macro, //else the pin is sink --> only care about its driver else { ClusterPinId source_pin = cluster_ctx.clb_nlist.net_driver(net_id); - if (!is_block_placed(cluster_ctx.clb_nlist.pin_block(source_pin))) { + if (!is_block_placed(cluster_ctx.clb_nlist.pin_block(source_pin), block_locs)) { //add unplaced block to connected_blocks_to_update vector to update its score later. connected_blocks_to_update.push_back(cluster_ctx.clb_nlist.pin_block(source_pin)); continue; } - get_coordinate_of_pin(source_pin, tile_loc); + t_physical_tile_loc tile_loc = get_coordinate_of_pin(source_pin, blk_loc_registry); if (find_layer) { VTR_ASSERT(tile_loc.layer_num != OPEN); layer_count[tile_loc.layer_num]++; @@ -460,12 +498,15 @@ static std::vector find_centroid_loc(const t_pl_macro& pl_macro, static bool try_centroid_placement(const t_pl_macro& pl_macro, const PartitionRegion& pr, t_logical_block_type_ptr block_type, - enum e_pad_loc_type pad_loc_type, - vtr::vector& block_scores) { + e_pad_loc_type pad_loc_type, + vtr::vector& block_scores, + BlkLocRegistry& blk_loc_registry) { + auto& block_locs = blk_loc_registry.mutable_block_locs(); + t_pl_loc centroid_loc(OPEN, OPEN, OPEN, OPEN); std::vector unplaced_blocks_to_update_their_score; - unplaced_blocks_to_update_their_score = find_centroid_loc(pl_macro, centroid_loc); + unplaced_blocks_to_update_their_score = find_centroid_loc(pl_macro, centroid_loc, blk_loc_registry); //no suggestion was available for this block type if (!is_loc_on_chip({centroid_loc.x, centroid_loc.y, centroid_loc.layer})) { @@ -476,7 +517,7 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro, //try to find a near location that meet these requirements bool neighbor_legal_loc = false; if (!is_loc_legal(centroid_loc, pr, block_type)) { - neighbor_legal_loc = find_centroid_neighbor(centroid_loc, block_type, false); + neighbor_legal_loc = find_centroid_neighbor(centroid_loc, block_type, false, blk_loc_registry); if (!neighbor_legal_loc) { //no neighbor candidate found return false; } @@ -502,15 +543,13 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro, VTR_ASSERT(width_offset == 0); VTR_ASSERT(height_offset == 0); - bool legal; - - legal = try_place_macro(pl_macro, centroid_loc); + bool legal = try_place_macro(pl_macro, centroid_loc, blk_loc_registry); if (legal) { - fix_IO_block_types(pl_macro, centroid_loc, pad_loc_type); + fix_IO_block_types(pl_macro, centroid_loc, pad_loc_type, block_locs); //after placing the current block, its connections' score must be updated. - for (auto blk_id : unplaced_blocks_to_update_their_score) { + for (ClusterBlockId blk_id : unplaced_blocks_to_update_their_score) { block_scores[blk_id].number_of_placed_connections++; } } @@ -520,10 +559,8 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro, static int get_y_loc_based_on_macro_direction(t_grid_empty_locs_block_type first_macro_loc, const t_pl_macro& pl_macro) { int y = first_macro_loc.first_avail_loc.y; - /* - * if the macro member offset is positive, it means that macro head should be placed at the first location of first_macro_loc. + /* if the macro member offset is positive, it means that macro head should be placed at the first location of first_macro_loc. * otherwise, macro head should be placed at the last available location to ensure macro_can_be_placed can check macro location correctly. - * */ if (pl_macro.members.size() > 1) { if (pl_macro.members.at(1).offset.y < 0) { @@ -605,15 +642,18 @@ static std::vector init_blk_types_empty_locations( return block_type_empty_locs; } -static inline void fix_IO_block_types(const t_pl_macro& pl_macro, t_pl_loc loc, enum e_pad_loc_type pad_loc_type) { +static inline void fix_IO_block_types(const t_pl_macro& pl_macro, + t_pl_loc loc, + e_pad_loc_type pad_loc_type, + vtr::vector_map& block_locs) { const auto& device_ctx = g_vpr_ctx.device(); - auto& place_ctx = g_vpr_ctx.mutable_placement(); + //If the user marked the IO block pad_loc_type as RANDOM, that means it should be randomly //placed and then stay fixed to that location, which is why the macro members are marked as fixed. const auto& type = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer}); - if (is_io_type(type) && pad_loc_type == RANDOM) { - for (const auto& pl_macro_member : pl_macro.members) { - place_ctx.block_locs[pl_macro_member.blk_index].is_fixed = true; + if (is_io_type(type) && pad_loc_type == e_pad_loc_type::RANDOM) { + for (const t_pl_macro_member& pl_macro_member : pl_macro.members) { + block_locs[pl_macro_member.blk_index].is_fixed = true; } } } @@ -621,9 +661,9 @@ static inline void fix_IO_block_types(const t_pl_macro& pl_macro, t_pl_loc loc, bool try_place_macro_randomly(const t_pl_macro& pl_macro, const PartitionRegion& pr, t_logical_block_type_ptr block_type, - enum e_pad_loc_type pad_loc_type) { + e_pad_loc_type pad_loc_type, + BlkLocRegistry& blk_loc_registry) { const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index]; - t_pl_loc loc; /* * Getting various values needed for the find_compatible_compressed_loc_in_range() routine called below. @@ -667,14 +707,18 @@ bool try_place_macro_randomly(const t_pl_macro& pl_macro, min_compressed_loc.y, max_compressed_loc.y, selected_layer, selected_layer}, to_compressed_loc, - false, + /*is_median=*/false, selected_layer, - false); + /*search_for_empty=*/false, + blk_loc_registry); + + if (!legal) { //No valid position found return false; } + t_pl_loc loc; compressed_grid_to_loc(block_type, to_compressed_loc, loc); auto& device_ctx = g_vpr_ctx.device(); @@ -684,10 +728,11 @@ bool try_place_macro_randomly(const t_pl_macro& pl_macro, VTR_ASSERT(width_offset == 0); VTR_ASSERT(height_offset == 0); - legal = try_place_macro(pl_macro, loc); + legal = try_place_macro(pl_macro, loc, blk_loc_registry); if (legal) { - fix_IO_block_types(pl_macro, loc, pad_loc_type); + auto& block_locs = blk_loc_registry.mutable_block_locs(); + fix_IO_block_types(pl_macro, loc, pad_loc_type, block_locs); } return legal; @@ -696,9 +741,11 @@ bool try_place_macro_randomly(const t_pl_macro& pl_macro, bool try_place_macro_exhaustively(const t_pl_macro& pl_macro, const PartitionRegion& pr, t_logical_block_type_ptr block_type, - enum e_pad_loc_type pad_loc_type) { + e_pad_loc_type pad_loc_type, + BlkLocRegistry& blk_loc_registry) { const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index]; - auto& place_ctx = g_vpr_ctx.mutable_placement(); + auto& block_locs = blk_loc_registry.mutable_block_locs(); + const GridBlock& grid_blocks = blk_loc_registry.grid_blocks(); const std::vector& regions = pr.get_regions(); @@ -744,11 +791,11 @@ bool try_place_macro_exhaustively(const t_pl_macro& pl_macro, int subtile = regions[reg].get_sub_tile(); to_loc.sub_tile = subtile; - if (place_ctx.grid_blocks.block_at_location(to_loc) == EMPTY_BLOCK_ID) { - placed = try_place_macro(pl_macro, to_loc); + if (grid_blocks.block_at_location(to_loc) == ClusterBlockId::INVALID()) { + placed = try_place_macro(pl_macro, to_loc, blk_loc_registry); if (placed) { - fix_IO_block_types(pl_macro, to_loc, pad_loc_type); + fix_IO_block_types(pl_macro, to_loc, pad_loc_type, block_locs); } } } else { @@ -759,10 +806,10 @@ bool try_place_macro_exhaustively(const t_pl_macro& pl_macro, for (int st = st_low; st <= st_high && !placed; st++) { to_loc.sub_tile = st; - if (place_ctx.grid_blocks.block_at_location(to_loc) == EMPTY_BLOCK_ID) { - placed = try_place_macro(pl_macro, to_loc); + if (grid_blocks.block_at_location(to_loc) == ClusterBlockId::INVALID()) { + placed = try_place_macro(pl_macro, to_loc, blk_loc_registry); if (placed) { - fix_IO_block_types(pl_macro, to_loc, pad_loc_type); + fix_IO_block_types(pl_macro, to_loc, pad_loc_type, block_locs); } } } @@ -784,8 +831,9 @@ bool try_place_macro_exhaustively(const t_pl_macro& pl_macro, static bool try_dense_placement(const t_pl_macro& pl_macro, const PartitionRegion& pr, t_logical_block_type_ptr block_type, - enum e_pad_loc_type pad_loc_type, - std::vector* blk_types_empty_locs_in_grid) { + e_pad_loc_type pad_loc_type, + std::vector* blk_types_empty_locs_in_grid, + BlkLocRegistry& blk_loc_registry) { t_pl_loc loc; int column_index = get_blk_type_first_loc(loc, pl_macro, blk_types_empty_locs_in_grid); @@ -801,11 +849,11 @@ static bool try_dense_placement(const t_pl_macro& pl_macro, VTR_ASSERT(width_offset == 0); VTR_ASSERT(height_offset == 0); - bool legal = false; - legal = try_place_macro(pl_macro, loc); + bool legal = try_place_macro(pl_macro, loc, blk_loc_registry); if (legal) { - fix_IO_block_types(pl_macro, loc, pad_loc_type); + auto& block_locs = blk_loc_registry.mutable_block_locs(); + fix_IO_block_types(pl_macro, loc, pad_loc_type, block_locs); } //Dense placement found a legal position for pl_macro; @@ -814,10 +862,13 @@ static bool try_dense_placement(const t_pl_macro& pl_macro, return legal; } -bool try_place_macro(const t_pl_macro& pl_macro, t_pl_loc head_pos) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); +bool try_place_macro(const t_pl_macro& pl_macro, + t_pl_loc head_pos, + BlkLocRegistry& blk_loc_registry) { + bool f_placer_debug = g_vpr_ctx.placement().f_placer_debug; + const GridBlock& grid_blocks = blk_loc_registry.grid_blocks(); - VTR_LOGV_DEBUG(place_ctx.f_placer_debug, "\t\t\t\tTry to place the macro at %dx%dx%dx%d\n", + VTR_LOGV_DEBUG(f_placer_debug, "\t\t\t\tTry to place the macro at %dx%dx%dx%d\n", head_pos.x, head_pos.y, head_pos.sub_tile, @@ -826,38 +877,37 @@ bool try_place_macro(const t_pl_macro& pl_macro, t_pl_loc head_pos) { bool macro_placed = false; // If that location is occupied, do nothing. - if (place_ctx.grid_blocks.block_at_location(head_pos) != EMPTY_BLOCK_ID) { - return (macro_placed); + if (grid_blocks.block_at_location(head_pos)) { + return macro_placed; } - bool mac_can_be_placed = macro_can_be_placed(pl_macro, head_pos, false); + bool mac_can_be_placed = macro_can_be_placed(pl_macro, head_pos, /*check_all_legality=*/false, blk_loc_registry); if (mac_can_be_placed) { // Place down the macro macro_placed = true; - VTR_LOGV_DEBUG(place_ctx.f_placer_debug, "\t\t\t\tMacro is placed at the given location\n"); - for (const auto& pl_macro_member : pl_macro.members) { + VTR_LOGV_DEBUG(f_placer_debug, "\t\t\t\tMacro is placed at the given location\n"); + for (const t_pl_macro_member& pl_macro_member : pl_macro.members) { t_pl_loc member_pos = head_pos + pl_macro_member.offset; - ClusterBlockId iblk = pl_macro_member.blk_index; - - set_block_location(iblk, member_pos); - + blk_loc_registry.set_block_location(iblk, member_pos); } // Finish placing all the members in the macro } - return (macro_placed); + return macro_placed; } static bool place_macro(int macros_max_num_tries, const t_pl_macro& pl_macro, enum e_pad_loc_type pad_loc_type, std::vector* blk_types_empty_locs_in_grid, - vtr::vector& block_scores) { + vtr::vector& block_scores, + BlkLocRegistry& blk_loc_registry) { + const auto& block_locs = blk_loc_registry.block_locs(); ClusterBlockId blk_id = pl_macro.members[0].blk_index; VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tHead of the macro is Block %d\n", size_t(blk_id)); - if (is_block_placed(blk_id)) { + if (is_block_placed(blk_id, block_locs)) { VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\t\tBlock is already placed\n", size_t(blk_id)); return true; } @@ -881,18 +931,18 @@ static bool place_macro(int macros_max_num_tries, //We need to place densely in second iteration to be able to find a legal initial placement solution if (blk_types_empty_locs_in_grid != nullptr && !blk_types_empty_locs_in_grid->empty()) { VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\t\tTry dense placement\n"); - macro_placed = try_dense_placement(pl_macro, pr, block_type, pad_loc_type, blk_types_empty_locs_in_grid); + macro_placed = try_dense_placement(pl_macro, pr, block_type, pad_loc_type, blk_types_empty_locs_in_grid, blk_loc_registry); } if (!macro_placed) { VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\t\tTry centroid placement\n"); - macro_placed = try_centroid_placement(pl_macro, pr, block_type, pad_loc_type, block_scores); + macro_placed = try_centroid_placement(pl_macro, pr, block_type, pad_loc_type, block_scores, blk_loc_registry); } VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\t\tMacro is placed: %d\n", macro_placed); // If macro is not placed yet, try to place the macro randomly for the max number of random tries for (int itry = 0; itry < macros_max_num_tries && !macro_placed; itry++) { VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\t\tTry random place iter: %d\n", itry); - macro_placed = try_place_macro_randomly(pl_macro, pr, block_type, pad_loc_type); + macro_placed = try_place_macro_randomly(pl_macro, pr, block_type, pad_loc_type, blk_loc_registry); } // Finished all tries if (!macro_placed) { @@ -904,7 +954,7 @@ static bool place_macro(int macros_max_num_tries, // Exhaustive placement of carry macros VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\t\tTry exhaustive placement\n"); - macro_placed = try_place_macro_exhaustively(pl_macro, pr, block_type, pad_loc_type); + macro_placed = try_place_macro_exhaustively(pl_macro, pr, block_type, pad_loc_type, blk_loc_registry); } return macro_placed; } @@ -955,10 +1005,11 @@ static vtr::vector assign_block_scores() { } -static void place_all_blocks([[maybe_unused]] const t_placer_opts& placer_opts, +static void place_all_blocks(const t_placer_opts& placer_opts, vtr::vector& block_scores, enum e_pad_loc_type pad_loc_type, - const char* constraints_file) { + const char* constraints_file, + BlkLocRegistry& blk_loc_registry) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.placement(); auto& device_ctx = g_vpr_ctx.device(); @@ -981,12 +1032,12 @@ static void place_all_blocks([[maybe_unused]] const t_placer_opts& placer_opts, for (auto iter_no = 0; iter_no < MAX_INIT_PLACE_ATTEMPTS; iter_no++) { //clear grid for a new placement iteration - clear_block_type_grid_locs(unplaced_blk_type_in_curr_itr); + clear_block_type_grid_locs(unplaced_blk_type_in_curr_itr, blk_loc_registry); unplaced_blk_type_in_curr_itr.clear(); // read the constraint file if the user has provided one and this is not the first attempt if (strlen(constraints_file) != 0 && iter_no != 0) { - read_constraints(constraints_file); + read_constraints(constraints_file, blk_loc_registry); } //resize the vector to store unplaced block types empty locations @@ -1011,12 +1062,14 @@ static void place_all_blocks([[maybe_unused]] const t_placer_opts& placer_opts, #ifdef VTR_ENABLE_DEBUG_LOGGING enable_placer_debug(placer_opts, blk_id); +#else + (void)placer_opts; #endif VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "Popped Block %d\n", size_t(blk_id)); blocks_placed_since_heap_update++; - bool block_placed = place_one_block(blk_id, pad_loc_type, &blk_types_empty_locs_in_grid[blk_id_type->index], &block_scores); + bool block_placed = place_one_block(blk_id, pad_loc_type, &blk_types_empty_locs_in_grid[blk_id_type->index], &block_scores, blk_loc_registry); //update heap based on update_heap_freq calculated above if (blocks_placed_since_heap_update % (update_heap_freq) == 0) { @@ -1044,7 +1097,7 @@ static void place_all_blocks([[maybe_unused]] const t_placer_opts& placer_opts, } //loop over block types with macro that have failed to be placed, and add their locations in grid for the next iteration - for (auto itype : unplaced_blk_type_in_curr_itr) { + for (int itype : unplaced_blk_type_in_curr_itr) { blk_types_empty_locs_in_grid[itype] = init_blk_types_empty_locations(itype); } @@ -1053,8 +1106,13 @@ static void place_all_blocks([[maybe_unused]] const t_placer_opts& placer_opts, } } -static void clear_block_type_grid_locs(const std::unordered_set& unplaced_blk_types_index) { +static void clear_block_type_grid_locs(const std::unordered_set& unplaced_blk_types_index, + BlkLocRegistry& blk_loc_registry) { auto& device_ctx = g_vpr_ctx.device(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& grid_blocks = blk_loc_registry.mutable_grid_blocks(); + auto& block_locs = blk_loc_registry.mutable_block_locs(); + bool clear_all_block_types = false; /* check if all types should be cleared @@ -1065,24 +1123,18 @@ static void clear_block_type_grid_locs(const std::unordered_set& unplaced_b clear_all_block_types = true; } - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.mutable_placement(); - int itype; - /* We'll use the grid to record where everything goes. Initialize to the grid has no * blocks placed anywhere. */ for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { for (int i = 0; i < (int)device_ctx.grid.width(); i++) { for (int j = 0; j < (int)device_ctx.grid.height(); j++) { - const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num}); - itype = type->index; + const t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type({i, j, layer_num}); + int itype = type->index; if (clear_all_block_types || unplaced_blk_types_index.count(itype)) { - place_ctx.grid_blocks.set_usage({i, j, layer_num}, 0); + grid_blocks.set_usage({i, j, layer_num}, 0); for (int k = 0; k < device_ctx.physical_tile_types[itype].capacity; k++) { - if (place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}) != INVALID_BLOCK_ID) { - place_ctx.grid_blocks.set_block_at_location({i, j, k, layer_num}, EMPTY_BLOCK_ID); - } + grid_blocks.set_block_at_location({i, j, k, layer_num}, ClusterBlockId::INVALID()); } } } @@ -1090,15 +1142,15 @@ static void clear_block_type_grid_locs(const std::unordered_set& unplaced_b } /* Similarly, mark all blocks as not being placed yet. */ - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - auto blk_type = cluster_ctx.clb_nlist.block_type(blk_id)->index; + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + int blk_type = cluster_ctx.clb_nlist.block_type(blk_id)->index; if (clear_all_block_types || unplaced_blk_types_index.count(blk_type)) { - place_ctx.block_locs[blk_id].loc = t_pl_loc(); + block_locs[blk_id].loc = t_pl_loc(); } } } -static void clear_all_grid_locs() { +static void clear_all_grid_locs(BlkLocRegistry& blk_loc_registry) { auto& device_ctx = g_vpr_ctx.device(); std::unordered_set blk_types_to_be_cleared; @@ -1106,53 +1158,53 @@ static void clear_all_grid_locs() { // Insert all the logical block types into the set except the empty type // clear_block_type_grid_locs does not expect empty type to be among given types - for (const auto& logical_type : logical_block_types) { + for (const t_logical_block_type& logical_type : logical_block_types) { if (!is_empty_type(&logical_type)) { blk_types_to_be_cleared.insert(logical_type.index); } } - clear_block_type_grid_locs(blk_types_to_be_cleared); + clear_block_type_grid_locs(blk_types_to_be_cleared, blk_loc_registry); } -bool place_one_block(const ClusterBlockId& blk_id, +bool place_one_block(const ClusterBlockId blk_id, enum e_pad_loc_type pad_loc_type, std::vector* blk_types_empty_locs_in_grid, - vtr::vector* block_scores) { - auto& place_ctx = g_vpr_ctx.placement(); + vtr::vector* block_scores, + BlkLocRegistry& blk_loc_registry) { + const std::vector& pl_macros = g_vpr_ctx.placement().pl_macros; + const auto& block_locs = blk_loc_registry.block_locs(); //Check if block has already been placed - if (is_block_placed(blk_id)) { + if (is_block_placed(blk_id, block_locs)) { return true; } bool placed_macro = false; //Lookup to see if the block is part of a macro - t_pl_macro pl_macro; int imacro; - get_imacro_from_iblk(&imacro, blk_id, place_ctx.pl_macros); + get_imacro_from_iblk(&imacro, blk_id, pl_macros); if (imacro != -1) { //If the block belongs to a macro, pass that macro to the placement routines VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tBelongs to a macro %d\n", imacro); - pl_macro = place_ctx.pl_macros[imacro]; - placed_macro = place_macro(MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY, pl_macro, pad_loc_type, blk_types_empty_locs_in_grid, (*block_scores)); + const t_pl_macro& pl_macro = pl_macros[imacro]; + placed_macro = place_macro(MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY, pl_macro, pad_loc_type, blk_types_empty_locs_in_grid, *block_scores, blk_loc_registry); } else { //If it does not belong to a macro, create a macro with the one block and then pass to the placement routines //This is done so that the initial placement flow can be the same whether the block belongs to a macro or not t_pl_macro_member macro_member; - t_pl_offset block_offset(0, 0, 0, 0); - macro_member.blk_index = blk_id; - macro_member.offset = block_offset; + macro_member.offset = t_pl_offset(0, 0, 0, 0); + t_pl_macro pl_macro; pl_macro.members.push_back(macro_member); - placed_macro = place_macro(MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY, pl_macro, pad_loc_type, blk_types_empty_locs_in_grid, (*block_scores)); + placed_macro = place_macro(MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY, pl_macro, pad_loc_type, blk_types_empty_locs_in_grid, *block_scores, blk_loc_registry); } return placed_macro; } -static void alloc_and_load_movable_blocks() { +static void alloc_and_load_movable_blocks(const vtr::vector_map& block_locs) { auto& place_ctx = g_vpr_ctx.mutable_placement(); const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& device_ctx = g_vpr_ctx.device(); @@ -1165,8 +1217,8 @@ static void alloc_and_load_movable_blocks() { // iterate over all clustered blocks and store block ids of movable ones - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - const auto& loc = place_ctx.block_locs[blk_id]; + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + const auto& loc = block_locs[blk_id]; if (!loc.is_fixed) { place_ctx.movable_blocks.push_back(blk_id); @@ -1178,13 +1230,15 @@ static void alloc_and_load_movable_blocks() { void initial_placement(const t_placer_opts& placer_opts, const char* constraints_file, - const t_noc_opts& noc_opts) { + const t_noc_opts& noc_opts, + BlkLocRegistry& blk_loc_registry) { vtr::ScopedStartFinishTimer timer("Initial Placement"); + auto& block_locs = blk_loc_registry.mutable_block_locs(); /* Initialize the grid blocks to empty. * Initialize all the blocks to unplaced. */ - clear_all_grid_locs(); + clear_all_grid_locs(blk_loc_registry); /* Go through cluster blocks to calculate the tightest placement * floorplan constraint for each constrained block @@ -1192,9 +1246,8 @@ void initial_placement(const t_placer_opts& placer_opts, propagate_place_constraints(); /*Mark the blocks that have already been locked to one spot via floorplan constraints - * as fixed, so they do not get moved during initial placement or later during the simulated annealing stage of placement - */ - mark_fixed_blocks(); + * as fixed, so they do not get moved during initial placement or later during the simulated annealing stage of placement*/ + mark_fixed_blocks(blk_loc_registry); // Compute and store compressed floorplanning constraints alloc_and_load_compressed_cluster_constraints(); @@ -1202,16 +1255,18 @@ void initial_placement(const t_placer_opts& placer_opts, // read the constraint file and place fixed blocks if (strlen(constraints_file) != 0) { - read_constraints(constraints_file); + read_constraints(constraints_file, blk_loc_registry); } + + if(!placer_opts.read_initial_place_file.empty()) { const auto& grid = g_vpr_ctx.device().grid; - read_place(nullptr, placer_opts.read_initial_place_file.c_str(), false, grid); + read_place(nullptr, placer_opts.read_initial_place_file.c_str(), blk_loc_registry, false, grid); } else { if (noc_opts.noc) { // NoC routers are placed before other blocks - initial_noc_placement(noc_opts, placer_opts); + initial_noc_placement(noc_opts, placer_opts, blk_loc_registry); propagate_place_constraints(); } @@ -1219,13 +1274,13 @@ void initial_placement(const t_placer_opts& placer_opts, vtr::vector block_scores = assign_block_scores(); //Place all blocks - place_all_blocks(placer_opts, block_scores, placer_opts.pad_loc_type, constraints_file); + place_all_blocks(placer_opts, block_scores, placer_opts.pad_loc_type, constraints_file, blk_loc_registry); } - alloc_and_load_movable_blocks(); + alloc_and_load_movable_blocks(block_locs); // ensure all blocks are placed and that NoC routing has no cycles - check_initial_placement_legality(); + check_initial_placement_legality(block_locs); //#ifdef VERBOSE // VTR_LOG("At end of initial_placement.\n"); diff --git a/vpr/src/place/initial_placement.h b/vpr/src/place/initial_placement.h index 44a3772087d..bb1d413bd64 100644 --- a/vpr/src/place/initial_placement.h +++ b/vpr/src/place/initial_placement.h @@ -1,10 +1,13 @@ #ifndef VPR_INITIAL_PLACEMENT_H #define VPR_INITIAL_PLACEMENT_H -#include "vpr_types.h" + #include "place_macro.h" #include "partition_region.h" +#include "vpr_types.h" +#include "vtr_vector_map.h" + /* The maximum number of tries when trying to place a macro at a * * random location before trying exhaustive placement - find the first * * legal position and place it during initial placement. */ @@ -30,7 +33,7 @@ struct t_block_score { /** * @brief keeps track of available empty locations of a specific block type during initial placement. - * Used to densly place macros that failed to be placed in the first initial placement iteration (random placement) + * Used to densely place macros that failed to be placed in the first initial placement iteration (random placement) */ struct t_grid_empty_locs_block_type { /* @@ -49,17 +52,21 @@ struct t_grid_empty_locs_block_type { * @brief tries to place a macro at a random location * * @param pl_macro The macro to be placed. - * @param pr The PartitionRegion of the macro - represents its floorplanning constraints, is the size of the whole chip if the macro is not - * constrained. + * @param pr The PartitionRegion of the macro - represents its floorplanning constraints, + * is the size of the whole chip if the macro is not constrained. * @param block_type Logical block type of the macro blocks. * @param pad_loc_type Used to check whether an io block needs to be marked as fixed. + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. * * @return true if the macro gets placed, false if not. */ bool try_place_macro_randomly(const t_pl_macro& pl_macro, const PartitionRegion& pr, t_logical_block_type_ptr block_type, - enum e_pad_loc_type pad_loc_type); + e_pad_loc_type pad_loc_type, + BlkLocRegistry& blk_loc_registry); + /** * @brief Looks for a valid placement location for macro exhaustively once the maximum number of random locations have been tried. @@ -69,13 +76,16 @@ bool try_place_macro_randomly(const t_pl_macro& pl_macro, * constrained. * @param block_type Logical block type of the macro blocks. * @param pad_loc_type Used to check whether an io block needs to be marked as fixed. + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. * * @return true if the macro gets placed, false if not. */ bool try_place_macro_exhaustively(const t_pl_macro& pl_macro, const PartitionRegion& pr, t_logical_block_type_ptr block_type, - enum e_pad_loc_type pad_loc_type); + e_pad_loc_type pad_loc_type, + BlkLocRegistry& blk_loc_registry); /** * @brief Places the macro if the head position passed in is legal, and all the resulting @@ -83,19 +93,26 @@ bool try_place_macro_exhaustively(const t_pl_macro& pl_macro, * * @param pl_macro The macro to be placed. * @param head_pos The location of the macro head member. + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. * * @return true if macro was placed, false if not. */ -bool try_place_macro(const t_pl_macro& pl_macro, t_pl_loc head_pos); +bool try_place_macro(const t_pl_macro& pl_macro, + t_pl_loc head_pos, + BlkLocRegistry& blk_loc_registry); /** * @brief Checks whether the block is already placed * * @param blk_id block id of the block to be checked + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. * * @return true if the block was placed, false if not. */ -bool is_block_placed(ClusterBlockId blk_id); +bool is_block_placed(ClusterBlockId blk_id, + const vtr::vector_map& block_locs); /** * @brief Tries to find an initial placement location for each block considering floorplanning constraints @@ -108,12 +125,15 @@ bool is_block_placed(ClusterBlockId blk_id); * @param placer_opts Required by the function that set the status of f_placer_debug. * Also used to access pad_loc_type to see if a block needs to be marked fixed. * @param constraints_file Used to read block locations if any constraints is available. - * @param noc_enabled Used to check whether the user turned on the noc - * optimization during placement. + * @param noc_opts Contains information about if the NoC optimization is enabled + * and NoC-related weighting factors. + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. */ void initial_placement(const t_placer_opts& placer_opts, const char* constraints_file, - const t_noc_opts& noc_opts); + const t_noc_opts& noc_opts, + BlkLocRegistry& blk_loc_registry); /** * @brief Looks for a valid placement location for block. @@ -121,9 +141,15 @@ void initial_placement(const t_placer_opts& placer_opts, * @param blk_id The block that should be placed. * @param pad_loc_type Used to check whether an io block needs to be marked as fixed. * @param blk_types_empty_locs_in_grid First location (lowest y) and number of remaining blocks in each column for the blk_id type - * + * @param block_scores Scores assign to different blocks to determine which one should be placed first. + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. * * @return true if the block gets placed, false if not. */ -bool place_one_block(const ClusterBlockId& blk_id, enum e_pad_loc_type pad_loc_type, std::vector* blk_types_empty_locs_in_grid, vtr::vector* block_scores); +bool place_one_block(const ClusterBlockId blk_id, + e_pad_loc_type pad_loc_type, + std::vector* blk_types_empty_locs_in_grid, + vtr::vector* block_scores, + BlkLocRegistry& blk_loc_registry); #endif diff --git a/vpr/src/place/manual_move_generator.cpp b/vpr/src/place/manual_move_generator.cpp index 6e2cf43d5cc..36d198acd09 100644 --- a/vpr/src/place/manual_move_generator.cpp +++ b/vpr/src/place/manual_move_generator.cpp @@ -2,18 +2,36 @@ * @file manual_move_generator.cpp * @author Paula Perdomo * @date 2021-07-19 - * @brief Contains the ManualMoveGenerator class memeber definitions. The ManualMoveGenerator class inherits from the MoveGenerator class. The class contains a propose_move function that checks if the block requested to move by the user exists and determines whether the manual move is VALID/ABORTED by the placer. If the manual move is determined VALID, the move is created. A manual move is ABORTED if the block requested is not found or movable and if there aren't any compatible subtiles. + * @brief Contains the ManualMoveGenerator class member definitions. + * The ManualMoveGenerator class inherits from the MoveGenerator class. + * The class contains a propose_move function that checks if the block requested + * to move by the user exists and determines whether the manual move is VALID/ABORTED + * by the placer. If the manual move is determined VALID, the move is created. + * A manual move is ABORTED if the block requested is not found or movable and if there aren't any compatible subtiles. */ #include "manual_move_generator.h" #include "manual_moves.h" +#include "placer_state.h" #ifndef NO_GRAPHICS # include "draw.h" #endif //NO_GRAPHICS +ManualMoveGenerator::ManualMoveGenerator(PlacerState& placer_state) + : MoveGenerator(placer_state) {} + //Manual Move Generator function -e_create_move ManualMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& /*proposed_action*/, float /*rlim*/, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) { +e_create_move ManualMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& /*proposed_action*/, + float /*rlim*/, + const t_placer_opts& /*placer_opts*/, + const PlacerCriticalities* /*criticalities*/) { + auto& place_ctx = g_vpr_ctx.placement(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& device_ctx = g_vpr_ctx.device(); + auto& block_locs = placer_state_.get().block_locs(); + int block_id = -1; t_pl_loc to; @@ -30,12 +48,8 @@ e_create_move ManualMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ return e_create_move::ABORT; //No movable block was found } - auto& place_ctx = g_vpr_ctx.placement(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& device_ctx = g_vpr_ctx.device(); - //Gets the current location of the block to move. - t_pl_loc from = place_ctx.block_locs[b_from].loc; + t_pl_loc from = block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); auto grid_from_type = device_ctx.grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); @@ -51,6 +65,7 @@ e_create_move ManualMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ return e_create_move::ABORT; } - e_create_move create_move = ::create_move(blocks_affected, b_from, to); + e_create_move create_move = ::create_move(blocks_affected, b_from, to, place_ctx.blk_loc_registry()); return create_move; } + diff --git a/vpr/src/place/manual_move_generator.h b/vpr/src/place/manual_move_generator.h index 2995006e908..684b110ba3b 100644 --- a/vpr/src/place/manual_move_generator.h +++ b/vpr/src/place/manual_move_generator.h @@ -26,8 +26,15 @@ */ class ManualMoveGenerator : public MoveGenerator { public: + ManualMoveGenerator() = delete; + explicit ManualMoveGenerator(PlacerState& placer_state); + //Evaluates if move is successful and legal or unable to do. - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& /*proposed_action*/, float /*rlim*/, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) override; + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& /*proposed_action*/, + float /*rlim*/, + const t_placer_opts& /*placer_opts*/, + const PlacerCriticalities* /*criticalities*/) override; }; #endif /*VPR_MANUAL_MOVE_GEN_H */ diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp index 20ef207221d..3f116fe71c8 100644 --- a/vpr/src/place/median_move_generator.cpp +++ b/vpr/src/place/median_move_generator.cpp @@ -1,28 +1,35 @@ #include "median_move_generator.h" + #include "globals.h" -#include #include "place_constraints.h" -#include "placer_globals.h" +#include "placer_state.h" #include "move_utils.h" -static bool get_bb_incrementally(ClusterNetId net_id, - t_bb& bb_coord_new, - int xold, - int yold, - int layer_old, - int xnew, - int ynew, - int layer_new); +#include -static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_coord_new, ClusterBlockId block_id, bool& skip_net); +MedianMoveGenerator::MedianMoveGenerator(PlacerState& placer_state) + : MoveGenerator(placer_state) {} + +e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& proposed_action, + float rlim, + const t_placer_opts& placer_opts, + const PlacerCriticalities* /*criticalities*/) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& device_ctx = g_vpr_ctx.device(); + auto& placer_state = placer_state_.get(); + auto& place_move_ctx = placer_state.mutable_move(); + const auto& block_locs = placer_state.block_locs(); + const auto& blk_loc_registry = placer_state.blk_loc_registry(); -e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) { //Find a movable block based on blk_type ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, - false, - nullptr, - nullptr); + /*highly_crit_block=*/false, + /*net_from=*/nullptr, + /*pin_from=*/nullptr, + placer_state); + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "Median Move Choose Block %d - rlim %f\n", size_t(b_from), rlim); if (!b_from) { //No movable block found @@ -30,15 +37,10 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ return e_create_move::ABORT; } - auto& place_ctx = g_vpr_ctx.placement(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& device_ctx = g_vpr_ctx.device(); - auto& place_move_ctx = g_placer_ctx.mutable_move(); - const int num_layers = device_ctx.grid.get_num_layers(); - t_pl_loc from = place_ctx.block_locs[b_from].loc; + t_pl_loc from = block_locs[b_from].loc; int from_layer = from.layer; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from_layer}); @@ -49,8 +51,6 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ t_bb coords(OPEN, OPEN, OPEN, OPEN, OPEN, OPEN); t_bb limit_coords; - ClusterBlockId bnum; - int pnum, xnew, xold, ynew, yold, layer_new, layer_old; //clear the vectors that saves X & Y coords //reused to save allocation time @@ -76,59 +76,58 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ if (cluster_ctx.clb_nlist.net_sinks(net_id).size() < SMALL_NET) { //calculate the bb from scratch get_bb_from_scratch_excluding_block(net_id, coords, b_from, skip_net); - if (skip_net) + if (skip_net) { continue; + } } else { t_bb union_bb; - const bool& cube_bb = g_vpr_ctx.placement().cube_bb; + const bool cube_bb = g_vpr_ctx.placement().cube_bb; if (!cube_bb) { union_bb = union_2d_bb(place_move_ctx.layer_bb_coords[net_id]); } const auto& net_bb_coords = cube_bb ? place_move_ctx.bb_coords[net_id] : union_bb; //use the incremental update of the bb - bnum = cluster_ctx.clb_nlist.pin_block(pin_id); - pnum = tile_pin_index(pin_id); + ClusterBlockId bnum = cluster_ctx.clb_nlist.pin_block(pin_id); + int pnum = blk_loc_registry.tile_pin_index(pin_id); VTR_ASSERT(pnum >= 0); - xold = place_ctx.block_locs[bnum].loc.x + physical_tile_type(bnum)->pin_width_offset[pnum]; - yold = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum]; - layer_old = place_ctx.block_locs[bnum].loc.layer; + t_pl_loc block_loc = block_locs[bnum].loc; + t_physical_tile_type_ptr block_physical_type = physical_tile_type(block_loc); + int xold = block_loc.x + block_physical_type->pin_width_offset[pnum]; + int yold = block_loc.y + block_physical_type->pin_height_offset[pnum]; + int layer_old = block_loc.layer; xold = std::max(std::min(xold, (int)device_ctx.grid.width() - 2), 1); //-2 for no perim channels yold = std::max(std::min(yold, (int)device_ctx.grid.height() - 2), 1); //-2 for no perim channels layer_old = std::max(std::min(layer_old, (int)device_ctx.grid.get_num_layers() - 1), 0); - //To calulate the bb incrementally while excluding the moving block + //To calculate the bb incrementally while excluding the moving block //assume that the moving block is moved to a non-critical coord of the bb + int xnew; if (net_bb_coords.xmin == xold) { xnew = net_bb_coords.xmax; } else { xnew = net_bb_coords.xmin; } + int ynew; if (net_bb_coords.ymin == yold) { ynew = net_bb_coords.ymax; } else { ynew = net_bb_coords.ymin; } + int layer_new; if (net_bb_coords.layer_min == layer_old) { layer_new = net_bb_coords.layer_max; } else { layer_new = net_bb_coords.layer_min; } - // If the mvoing block is on the border of the bounding box, we cannot get - // the bounding box incrementatlly. In that case, bounding box should be calculated + // If the moving block is on the border of the bounding box, we cannot get + // the bounding box incrementally. In that case, bounding box should be calculated // from scratch. - if (!get_bb_incrementally(net_id, - coords, - xold, - yold, - layer_old, - xnew, - ynew, - layer_new)) { + if (!get_bb_incrementally(net_id, coords, xold, yold, layer_old, xnew, ynew, layer_new)) { get_bb_from_scratch_excluding_block(net_id, coords, b_from, skip_net); if (skip_net) continue; @@ -173,11 +172,11 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2; median_point.layer = (limit_coords.layer_min + limit_coords.layer_max) / 2; - if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from)) { + if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from, blk_loc_registry)) { return e_create_move::ABORT; } - e_create_move create_move = ::create_move(blocks_affected, b_from, to); + e_create_move create_move = ::create_move(blocks_affected, b_from, to, blk_loc_registry); //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap if (!floorplan_legal(blocks_affected)) { @@ -187,18 +186,13 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ return create_move; } -/* Finds the bounding box of a net and stores its coordinates in the * - * bb_coord_new data structure. It excludes the moving block sent in * - * function arguments in block_id. It also returns whether this net * - * should be excluded from median calculation or not. * - * This routine should only be called for small nets, since it does * - * not determine enough information for the bounding box to be * - * updated incrementally later. * - * Currently assumes channels on both sides of the CLBs forming the * - * edges of the bounding box can be used. Essentially, I am assuming * - * the pins always lie on the outside of the bounding box. */ -static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_coord_new, ClusterBlockId block_id, bool& skip_net) { +void MedianMoveGenerator::get_bb_from_scratch_excluding_block(ClusterNetId net_id, + t_bb& bb_coord_new, + ClusterBlockId block_id, + bool& skip_net) { //TODO: account for multiple physical pin instances per logical pin + const auto& placer_state = placer_state_.get(); + const auto& block_locs = placer_state.block_locs(); skip_net = true; @@ -213,7 +207,6 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_co int pnum; auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); auto& device_ctx = g_vpr_ctx.device(); ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id); @@ -221,10 +214,11 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_co if (bnum != block_id) { skip_net = false; - pnum = net_pin_to_tile_pin_index(net_id, 0); - int src_x = place_ctx.block_locs[bnum].loc.x + physical_tile_type(bnum)->pin_width_offset[pnum]; - int src_y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum]; - int src_layer = place_ctx.block_locs[bnum].loc.layer; + pnum = placer_state.blk_loc_registry().net_pin_to_tile_pin_index(net_id, 0); + const t_pl_loc& block_loc = block_locs[bnum].loc; + int src_x = block_loc.x + physical_tile_type(block_loc)->pin_width_offset[pnum]; + int src_y = block_loc.y + physical_tile_type(block_loc)->pin_height_offset[pnum]; + int src_layer = block_loc.layer; xmin = src_x; ymin = src_y; @@ -235,15 +229,15 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_co first_block = true; } - for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { + for (ClusterPinId pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { bnum = cluster_ctx.clb_nlist.pin_block(pin_id); - pnum = tile_pin_index(pin_id); + pnum = placer_state.blk_loc_registry().tile_pin_index(pin_id); if (bnum == block_id) continue; skip_net = false; - const auto& block_loc = place_ctx.block_locs[bnum].loc; - int x = block_loc.x + physical_tile_type(bnum)->pin_width_offset[pnum]; - int y = block_loc.y + physical_tile_type(bnum)->pin_height_offset[pnum]; + const auto& block_loc = block_locs[bnum].loc; + int x = block_loc.x + physical_tile_type(block_loc)->pin_width_offset[pnum]; + int y = block_loc.y + physical_tile_type(block_loc)->pin_height_offset[pnum]; int layer = block_loc.layer; if (!first_block) { @@ -290,31 +284,18 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_co bb_coord_new.layer_max = std::max(std::min(layer_max, device_ctx.grid.get_num_layers() - 1), 0); } -/* - * Calculates the bounding box of a net by storing its coordinates * - * in the bb_coord_new data structure. It uses information from * - * PlaceMoveContext to calculate the bb incrementally. This routine * - * should only be called for large nets, since it has some overhead * - * relative to just doing a brute force bounding box calculation. * - * The bounding box coordinate and edge information for inet must be * - * valid before this routine is called. * - * Currently assumes channels on both sides of the CLBs forming the * - * edges of the bounding box can be used. Essentially, I am assuming * - * the pins always lie on the outside of the bounding box. * - * The x and y coordinates are the pin's x and y coordinates. */ -/* IO blocks are considered to be one cell in for simplicity. */ -static bool get_bb_incrementally(ClusterNetId net_id, - t_bb& bb_coord_new, - int xold, - int yold, - int layer_old, - int xnew, - int ynew, - int layer_new) { +bool MedianMoveGenerator::get_bb_incrementally(ClusterNetId net_id, + t_bb& bb_coord_new, + int xold, + int yold, + int layer_old, + int xnew, + int ynew, + int layer_new) { //TODO: account for multiple physical pin instances per logical pin auto& device_ctx = g_vpr_ctx.device(); - auto& place_move_ctx = g_placer_ctx.move(); + auto& place_move_ctx = placer_state_.get().move(); xnew = std::max(std::min(xnew, device_ctx.grid.width() - 2), 1); //-2 for no perim channels ynew = std::max(std::min(ynew, device_ctx.grid.height() - 2), 1); //-2 for no perim channels @@ -326,7 +307,7 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb union_bb_edge; t_bb union_bb; - const bool& cube_bb = g_vpr_ctx.placement().cube_bb; + const bool cube_bb = g_vpr_ctx.placement().cube_bb; /* Calculating per-layer bounding box is more time consuming compared to cube bounding box. To speed up * this move, the bounding box used for this move is of the type cube bounding box even if the per-layer * bounding box is used by placement SA engine. diff --git a/vpr/src/place/median_move_generator.h b/vpr/src/place/median_move_generator.h index ccecdf86a0e..be932f78d1f 100644 --- a/vpr/src/place/median_move_generator.h +++ b/vpr/src/place/median_move_generator.h @@ -16,7 +16,48 @@ * around it */ class MedianMoveGenerator : public MoveGenerator { - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) override; + public: + MedianMoveGenerator() = delete; + explicit MedianMoveGenerator(PlacerState& placer_state); + + private: + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& proposed_action, + float rlim, + const t_placer_opts& placer_opts, + const PlacerCriticalities* /*criticalities*/) override; + + /** + * @brief Calculates the bounding box of a net by storing its coordinates + * in the bb_coord_new data structure. + * + * @details It uses information from PlaceMoveContext to calculate the bb incrementally. + * This routine should only be called for large nets, since it has some overhead + * relative to just doing a brute force bounding box calculation. The bounding box coordinate + * and edge information for inet must be valid before this routine is called. + * Currently assumes channels on both sides of the CLBs forming the edges of the bounding box + * can be used. Essentially, I am assuming the pins always lie on the outside of the bounding box. + * The x and y coordinates are the pin's x and y coordinates. IO blocks are considered to be + * one cell in for simplicity. */ + bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, + int xold, int yold, int layer_old, + int xnew, int ynew, int layer_new); + + + /** + * @brief Finds the bounding box of a net and stores its coordinates in the bb_coord_new data structure. + * + * @details It excludes the moving block sent in function arguments in block_id. + * It also returns whether this net should be excluded from median calculation or not. + * This routine should only be called for small nets, since it does not determine + * enough information for the bounding box to be updated incrementally later. + * Currently assumes channels on both sides of the CLBs forming the edges of the bounding box can be used. + * Essentially, I am assuming the pins always lie on the outside of the bounding box. + */ + void get_bb_from_scratch_excluding_block(ClusterNetId net_id, + t_bb& bb_coord_new, + ClusterBlockId block_id, + bool& skip_net); }; #endif diff --git a/vpr/src/place/move_generator.h b/vpr/src/place/move_generator.h index 34d6d01cfa9..15315e4b01a 100644 --- a/vpr/src/place/move_generator.h +++ b/vpr/src/place/move_generator.h @@ -1,13 +1,15 @@ #ifndef VPR_MOVE_GENERATOR_H #define VPR_MOVE_GENERATOR_H + #include "vpr_types.h" #include "move_utils.h" #include "timing_place.h" #include "directed_moves_util.h" -#include "placer_globals.h" #include +class PlacerState; + struct MoveOutcomeStats { float delta_cost_norm = std::numeric_limits::quiet_NaN(); float delta_bb_cost_norm = std::numeric_limits::quiet_NaN(); @@ -42,6 +44,10 @@ struct MoveTypeStat { */ class MoveGenerator { public: + MoveGenerator(PlacerState& placer_state) + : placer_state_(placer_state) {} + + MoveGenerator() = delete; virtual ~MoveGenerator() = default; /** @@ -59,7 +65,11 @@ class MoveGenerator { * @param placer_opts: all the placer options * @param criticalities: the placer criticalities, useful for timing directed moves */ - virtual e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) = 0; + virtual e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& proposed_action, + float rlim, + const t_placer_opts& placer_opts, + const PlacerCriticalities* criticalities) = 0; /** * @brief Recieves feedback about the outcome of the previously proposed move @@ -70,6 +80,9 @@ class MoveGenerator { * @param reward_fun: the name of the reward function used */ virtual void process_outcome(double /*reward*/, e_reward_function /*reward_fun*/) {} + + protected: + std::reference_wrapper placer_state_; }; #endif diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp index db06d34e608..0065c70662d 100644 --- a/vpr/src/place/move_transactions.cpp +++ b/vpr/src/place/move_transactions.cpp @@ -2,7 +2,7 @@ #include "move_utils.h" #include "globals.h" -#include "place_util.h" +#include "grid_block.h" #include "vtr_assert.h" t_pl_blocks_to_be_moved::t_pl_blocks_to_be_moved(size_t max_blocks){ @@ -16,16 +16,16 @@ size_t t_pl_blocks_to_be_moved::get_size_and_increment() { } //Records that block 'blk' should be moved to the specified 'to' location -e_block_move_result t_pl_blocks_to_be_moved::record_block_move(ClusterBlockId blk, t_pl_loc to) { +e_block_move_result t_pl_blocks_to_be_moved::record_block_move(ClusterBlockId blk, + t_pl_loc to, + const BlkLocRegistry& blk_loc_registry) { auto [to_it, to_success] = moved_to.emplace(to); if (!to_success) { log_move_abort("duplicate block move to location"); return e_block_move_result::ABORT; } - auto& place_ctx = g_vpr_ctx.mutable_placement(); - - t_pl_loc from = place_ctx.block_locs[blk].loc; + t_pl_loc from = blk_loc_registry.block_locs()[blk].loc; auto [_, from_success] = moved_from.emplace(from); if (!from_success) { @@ -34,7 +34,7 @@ e_block_move_result t_pl_blocks_to_be_moved::record_block_move(ClusterBlockId bl return e_block_move_result::ABORT; } - VTR_ASSERT_SAFE(to.sub_tile < int(place_ctx.grid_blocks.num_blocks_at_location({to.x, to.y, to.layer}))); + VTR_ASSERT_SAFE(to.sub_tile < int(blk_loc_registry.grid_blocks().num_blocks_at_location({to.x, to.y, to.layer}))); // Sets up the blocks moved size_t imoved_blk = get_size_and_increment(); @@ -46,7 +46,7 @@ e_block_move_result t_pl_blocks_to_be_moved::record_block_move(ClusterBlockId bl } //Examines the currently proposed move and determine any empty locations -std::set t_pl_blocks_to_be_moved::t_pl_blocks_to_be_moved::determine_locations_emptied_by_move() { +std::set t_pl_blocks_to_be_moved::determine_locations_emptied_by_move() { std::set moved_from_set; std::set moved_to_set; @@ -67,8 +67,8 @@ std::set t_pl_blocks_to_be_moved::t_pl_blocks_to_be_moved::determine_l } //Moves the blocks in blocks_affected to their new locations -void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); +void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected, + BlkLocRegistry& blk_loc_registry) { auto& device_ctx = g_vpr_ctx.device(); //Swap the blocks, but don't swap the nets or update place_ctx.grid_blocks @@ -80,7 +80,7 @@ void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) { const t_pl_loc& new_loc = moved_block.new_loc; // move the block to its new location - place_ctx.block_locs[blk].loc = new_loc; + blk_loc_registry.mutable_block_locs()[blk].loc = new_loc; // get physical tile type of the old location t_physical_tile_type_ptr old_type = device_ctx.grid.get_physical_type({old_loc.x,old_loc.y,old_loc.layer}); @@ -89,15 +89,15 @@ void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) { //if physical tile type of old location does not equal physical tile type of new location, sync the new physical pins if (old_type != new_type) { - place_sync_external_block_connections(blk); + blk_loc_registry.place_sync_external_block_connections(blk); } } } //Commits the blocks in blocks_affected to their new locations (updates inverse //lookups via place_ctx.grid_blocks) -void commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); +void commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected, + GridBlock& grid_blocks) { /* Swap physical location */ for (const t_pl_moved_block& moved_block : blocks_affected.moved_blocks) { @@ -107,26 +107,24 @@ void commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) { const t_pl_loc& from = moved_block.old_loc; //Remove from old location only if it hasn't already been updated by a previous block update - if (place_ctx.grid_blocks.block_at_location(from) == blk) { - place_ctx.grid_blocks.set_block_at_location(from, EMPTY_BLOCK_ID); - place_ctx.grid_blocks.set_usage({from.x, from.y, from.layer}, - place_ctx.grid_blocks.get_usage({from.x, from.y, from.layer}) - 1); + if (grid_blocks.block_at_location(from) == blk) { + grid_blocks.set_block_at_location(from, ClusterBlockId::INVALID()); + grid_blocks.decrement_usage({from.x, from.y, from.layer}); } //Add to new location - if (place_ctx.grid_blocks.block_at_location(to) == EMPTY_BLOCK_ID) { + if (grid_blocks.block_at_location(to) == ClusterBlockId::INVALID()) { //Only need to increase usage if previously unused - place_ctx.grid_blocks.set_usage({to.x, to.y, to.layer}, - place_ctx.grid_blocks.get_usage({to.x, to.y, to.layer}) + 1); + grid_blocks.increment_usage({to.x, to.y, to.layer}); } - place_ctx.grid_blocks.set_block_at_location(to, blk); + grid_blocks.set_block_at_location(to, blk); } // Finish updating clb for all blocks } //Moves the blocks in blocks_affected to their old locations -void revert_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); +void revert_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected, + BlkLocRegistry& blk_loc_registry) { auto& device_ctx = g_vpr_ctx.device(); // Swap the blocks back, nets not yet swapped they don't need to be changed @@ -137,7 +135,7 @@ void revert_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) { const t_pl_loc& new_loc = moved_block.new_loc; // return the block to where it was before the swap - place_ctx.block_locs[blk].loc = old_loc; + blk_loc_registry.mutable_block_locs()[blk].loc = old_loc; // get physical tile type of the old location t_physical_tile_type_ptr old_type = device_ctx.grid.get_physical_type({old_loc.x,old_loc.y,old_loc.layer}); @@ -146,10 +144,11 @@ void revert_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) { //if physical tile type of old location does not equal physical tile type of new location, sync the new physical pins if (old_type != new_type) { - place_sync_external_block_connections(blk); + blk_loc_registry.place_sync_external_block_connections(blk); } - VTR_ASSERT_SAFE_MSG(place_ctx.grid_blocks.block_at_location(old_loc) == blk, "Grid blocks should only have been updated if swap committed (not reverted)"); + VTR_ASSERT_SAFE_MSG(blk_loc_registry.grid_blocks().block_at_location(old_loc) == blk, + "Grid blocks should only have been updated if swap committed (not reverted)"); } } diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h index 65e4c89774b..453e248cdea 100644 --- a/vpr/src/place/move_transactions.h +++ b/vpr/src/place/move_transactions.h @@ -1,8 +1,12 @@ #ifndef VPR_MOVE_TRANSACTIONS_H #define VPR_MOVE_TRANSACTIONS_H + #include "vpr_types.h" #include "clustered_netlist_utils.h" +class BlkLocRegistry; +class GridBlock; + enum class e_block_move_result { VALID, //Move successful ABORT, //Unable to perform move @@ -30,7 +34,7 @@ struct t_pl_moved_block { * placement. * * Store the information on the blocks to be moved in a swap during * * placement, in the form of array of structs instead of struct with * - * arrays for cache effifiency * + * arrays for cache efficiency * * * moved blocks: a list of moved blocks data structure with * * information on the move. * @@ -55,7 +59,10 @@ struct t_pl_blocks_to_be_moved { */ void clear_move_blocks(); - e_block_move_result record_block_move(ClusterBlockId blk, t_pl_loc to); + + e_block_move_result record_block_move(ClusterBlockId blk, + t_pl_loc to, + const BlkLocRegistry& blk_loc_registry); std::set determine_locations_emptied_by_move(); @@ -67,10 +74,13 @@ struct t_pl_blocks_to_be_moved { }; -void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected); +void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected, + BlkLocRegistry& blk_loc_registry); -void commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected); +void commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected, + GridBlock& grid_blocks); -void revert_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected); +void revert_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected, + BlkLocRegistry& blk_loc_registry); #endif diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp index 02e20f24fd0..c40bfce2c02 100644 --- a/vpr/src/place/move_utils.cpp +++ b/vpr/src/place/move_utils.cpp @@ -10,7 +10,7 @@ #include "draw.h" #include "place_constraints.h" -#include "placer_globals.h" +#include "placer_state.h" //f_placer_breakpoint_reached is used to stop the placer when a breakpoint is reached. When this flag is true, it stops the placer after the current perturbation. Thus, when a breakpoint is reached, this flag is set to true. //Note: The flag is only effective if compiled with VTR_ENABLE_DEBUG_LOGGING @@ -39,22 +39,26 @@ void report_aborted_moves() { } } -e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to) { - e_block_move_result outcome = find_affected_blocks(blocks_affected, b_from, to); +e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, + ClusterBlockId b_from, + t_pl_loc to, + const BlkLocRegistry& blk_loc_registry) { + const auto& block_locs = blk_loc_registry.block_locs(); + const GridBlock& grid_blocks = blk_loc_registry.grid_blocks(); + e_block_move_result outcome = find_affected_blocks(blocks_affected, b_from, to, blk_loc_registry); if (outcome == e_block_move_result::INVERT) { //Try inverting the swap direction - auto& place_ctx = g_vpr_ctx.placement(); - ClusterBlockId b_to = place_ctx.grid_blocks.block_at_location(to); + ClusterBlockId b_to = grid_blocks.block_at_location(to); if (!b_to) { log_move_abort("inverted move no to block"); outcome = e_block_move_result::ABORT; } else { - t_pl_loc from = place_ctx.block_locs[b_from].loc; + t_pl_loc from = block_locs[b_from].loc; - outcome = find_affected_blocks(blocks_affected, b_to, from); + outcome = find_affected_blocks(blocks_affected, b_to, from, blk_loc_registry); if (outcome == e_block_move_result::INVERT) { log_move_abort("inverted move recursion"); @@ -63,8 +67,7 @@ e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlock } } - if (outcome == e_block_move_result::VALID - || outcome == e_block_move_result::INVERT_VALID) { + if (outcome == e_block_move_result::VALID || outcome == e_block_move_result::INVERT_VALID) { return e_create_move::VALID; } else { VTR_ASSERT_SAFE(outcome == e_block_move_result::ABORT); @@ -72,19 +75,22 @@ e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlock } } -e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to) { +e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affected, + ClusterBlockId b_from, + t_pl_loc to, + const BlkLocRegistry& blk_loc_registry) { /* Finds and set ups the affected_blocks array. * Returns abort_swap. */ VTR_ASSERT_SAFE(b_from); + const auto& block_locs = blk_loc_registry.block_locs(); + const GridBlock& grid_blocks = blk_loc_registry.grid_blocks(); + const auto& pl_macros = g_vpr_ctx.placement().pl_macros; + int imacro_from; e_block_move_result outcome = e_block_move_result::VALID; - auto& place_ctx = g_vpr_ctx.placement(); - - t_pl_loc from = place_ctx.block_locs[b_from].loc; - - auto& pl_macros = place_ctx.pl_macros; + t_pl_loc from = block_locs[b_from].loc; get_imacro_from_iblk(&imacro_from, b_from, pl_macros); if (imacro_from != -1) { @@ -94,12 +100,12 @@ e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affecte t_pl_offset swap_offset = to - from; int imember_from = 0; - outcome = record_macro_swaps(blocks_affected, imacro_from, imember_from, swap_offset); + outcome = record_macro_swaps(blocks_affected, imacro_from, imember_from, swap_offset, blk_loc_registry); VTR_ASSERT_SAFE(outcome != e_block_move_result::VALID || imember_from == int(pl_macros[imacro_from].members.size())); } else { - ClusterBlockId b_to = place_ctx.grid_blocks.block_at_location(to); + ClusterBlockId b_to = grid_blocks.block_at_location(to); int imacro_to = -1; get_imacro_from_iblk(&imacro_to, b_to, pl_macros); @@ -111,7 +117,7 @@ e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affecte outcome = e_block_move_result::INVERT; } else { // This is not a macro - I could use the from and to info from before - outcome = record_single_block_swap(blocks_affected, b_from, to); + outcome = record_single_block_swap(blocks_affected, b_from, to, blk_loc_registry); } } // Finish handling cases for blocks in macro and otherwise @@ -119,48 +125,49 @@ e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affecte return outcome; } -e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to) { +e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_affected, + ClusterBlockId b_from, + t_pl_loc to, + const BlkLocRegistry& blk_loc_registry) { /* Find all the blocks affected when b_from is swapped with b_to. * Returns abort_swap. */ - VTR_ASSERT_SAFE(b_from); - auto& place_ctx = g_vpr_ctx.mutable_placement(); + const auto& block_locs = blk_loc_registry.block_locs(); + const GridBlock& grid_blocks = blk_loc_registry.grid_blocks(); - if (place_ctx.block_locs[b_from].is_fixed) { + if (block_locs[b_from].is_fixed) { return e_block_move_result::ABORT; } - VTR_ASSERT_SAFE(to.sub_tile < int(place_ctx.grid_blocks.num_blocks_at_location({to.x, to.y, to.layer}))); + VTR_ASSERT_SAFE(to.sub_tile < int(grid_blocks.num_blocks_at_location({to.x, to.y, to.layer}))); - ClusterBlockId b_to = place_ctx.grid_blocks.block_at_location(to); + ClusterBlockId b_to = grid_blocks.block_at_location(to); - t_pl_loc curr_from = place_ctx.block_locs[b_from].loc; + t_pl_loc curr_from = block_locs[b_from].loc; e_block_move_result outcome = e_block_move_result::VALID; // Check whether the to_location is empty - if (b_to == EMPTY_BLOCK_ID) { + if (b_to == ClusterBlockId::INVALID()) { // Sets up the blocks moved - outcome = blocks_affected.record_block_move(b_from, to); - - } else if (b_to != INVALID_BLOCK_ID) { + outcome = blocks_affected.record_block_move(b_from, to, blk_loc_registry); + } else { // Check whether block to is compatible with from location - if (b_to != EMPTY_BLOCK_ID && b_to != INVALID_BLOCK_ID) { - if (!(is_legal_swap_to_location(b_to, curr_from)) || place_ctx.block_locs[b_to].is_fixed) { - return e_block_move_result::ABORT; - } + if (!(is_legal_swap_to_location(b_to, curr_from, blk_loc_registry)) || block_locs[b_to].is_fixed) { + return e_block_move_result::ABORT; } + // Sets up the blocks moved - outcome = blocks_affected.record_block_move(b_from, to); + outcome = blocks_affected.record_block_move(b_from, to, blk_loc_registry); if (outcome != e_block_move_result::VALID) { return outcome; } - - t_pl_loc from = place_ctx.block_locs[b_from].loc; - outcome = blocks_affected.record_block_move(b_to, from); + + t_pl_loc from = block_locs[b_from].loc; + outcome = blocks_affected.record_block_move(b_to, from, blk_loc_registry); } // Finish swapping the blocks and setting up blocks_affected @@ -170,9 +177,14 @@ e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_aff //Records all the block movements required to move the macro imacro_from starting at member imember_from //to a new position offset from its current position by swap_offset. The new location may be a //single (non-macro) block, or another macro. -e_block_move_result record_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, const int imacro_from, int& imember_from, t_pl_offset swap_offset) { - auto& place_ctx = g_vpr_ctx.placement(); - auto& pl_macros = place_ctx.pl_macros; +e_block_move_result record_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, + const int imacro_from, + int& imember_from, + t_pl_offset swap_offset, + const BlkLocRegistry& blk_loc_registry) { + const auto& pl_macros = g_vpr_ctx.placement().pl_macros; + const auto& block_locs = blk_loc_registry.block_locs(); + const GridBlock& grid_blocks = blk_loc_registry.grid_blocks(); e_block_move_result outcome = e_block_move_result::VALID; @@ -181,7 +193,7 @@ e_block_move_result record_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, // cannot use the old from and to info ClusterBlockId curr_b_from = pl_macros[imacro_from].members[imember_from].blk_index; - t_pl_loc curr_from = place_ctx.block_locs[curr_b_from].loc; + t_pl_loc curr_from = block_locs[curr_b_from].loc; t_pl_loc curr_to = curr_from + swap_offset; @@ -191,12 +203,12 @@ e_block_move_result record_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, // * match the correct block type // //Note that we need to explicitly check that the types match, since the device floorplan is not - //(neccessarily) translationally invariant for an arbitrary macro - if (!is_legal_swap_to_location(curr_b_from, curr_to)) { + //(necessarily) translationally invariant for an arbitrary macro + if (!is_legal_swap_to_location(curr_b_from, curr_to, blk_loc_registry)) { log_move_abort("macro_from swap to location illegal"); outcome = e_block_move_result::ABORT; } else { - ClusterBlockId b_to = place_ctx.grid_blocks.block_at_location(curr_to); + ClusterBlockId b_to = grid_blocks.block_at_location(curr_to); int imacro_to = -1; get_imacro_from_iblk(&imacro_to, b_to, pl_macros); @@ -204,11 +216,11 @@ e_block_move_result record_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, //To block is a macro if (imacro_from == imacro_to) { - outcome = record_macro_self_swaps(blocks_affected, imacro_from, swap_offset); + outcome = record_macro_self_swaps(blocks_affected, imacro_from, swap_offset, blk_loc_registry); imember_from = pl_macros[imacro_from].members.size(); break; //record_macro_self_swaps() handles this case completely, so we don't need to continue the loop } else { - outcome = record_macro_macro_swaps(blocks_affected, imacro_from, imember_from, imacro_to, b_to, swap_offset); + outcome = record_macro_macro_swaps(blocks_affected, imacro_from, imember_from, imacro_to, b_to, swap_offset, blk_loc_registry); if (outcome == e_block_move_result::INVERT_VALID) { break; //The move was inverted and successfully proposed, don't need to continue the loop } @@ -216,7 +228,7 @@ e_block_move_result record_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, } } else { //To block is not a macro - outcome = record_single_block_swap(blocks_affected, curr_b_from, curr_to); + outcome = record_single_block_swap(blocks_affected, curr_b_from, curr_to, blk_loc_registry); } } } // Finish going through all the blocks in the macro @@ -226,7 +238,13 @@ e_block_move_result record_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, //Records all the block movements required to move the macro imacro_from starting at member imember_from //to a new position offset from its current position by swap_offset. The new location must be where //blk_to is located and blk_to must be part of imacro_to. -e_block_move_result record_macro_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, const int imacro_from, int& imember_from, const int imacro_to, ClusterBlockId blk_to, t_pl_offset swap_offset) { +e_block_move_result record_macro_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, + const int imacro_from, + int& imember_from, + const int imacro_to, + ClusterBlockId blk_to, + t_pl_offset swap_offset, + const BlkLocRegistry& blk_loc_registry) { //Adds the macro imacro_to to the set of affected block caused by swapping 'blk_to' to its //new position. // @@ -234,16 +252,17 @@ e_block_move_result record_macro_macro_swaps(t_pl_blocks_to_be_moved& blocks_aff //The position in the from macro ('imacro_from') is specified by 'imember_from', and the relevant //macro fro the to block is 'imacro_to'. - auto& place_ctx = g_vpr_ctx.placement(); + const auto& pl_macros = g_vpr_ctx.placement().pl_macros; + const auto& block_locs = blk_loc_registry.block_locs(); //At the moment, we only support blk_to being the first element of the 'to' macro. // //For instance, this means that we can swap two carry chains so long as one starts //below the other (not a big limitation since swapping in the opposite direction //allows these blocks to swap) - if (place_ctx.pl_macros[imacro_to].members[0].blk_index != blk_to) { + if (pl_macros[imacro_to].members[0].blk_index != blk_to) { int imember_to = 0; - auto outcome = record_macro_swaps(blocks_affected, imacro_to, imember_to, -swap_offset); + auto outcome = record_macro_swaps(blocks_affected, imacro_to, imember_to, -swap_offset, blk_loc_registry); if (outcome == e_block_move_result::INVERT) { log_move_abort("invert recursion2"); outcome = e_block_move_result::ABORT; @@ -254,8 +273,8 @@ e_block_move_result record_macro_macro_swaps(t_pl_blocks_to_be_moved& blocks_aff } //From/To blocks should be exactly the swap offset appart - ClusterBlockId blk_from = place_ctx.pl_macros[imacro_from].members[imember_from].blk_index; - VTR_ASSERT_SAFE(place_ctx.block_locs[blk_from].loc + swap_offset == place_ctx.block_locs[blk_to].loc); + ClusterBlockId blk_from = pl_macros[imacro_from].members[imember_from].blk_index; + VTR_ASSERT_SAFE(block_locs[blk_from].loc + swap_offset == block_locs[blk_to].loc); //Continue walking along the overlapping parts of the from and to macros, recording //each block swap. @@ -272,47 +291,47 @@ e_block_move_result record_macro_macro_swaps(t_pl_blocks_to_be_moved& blocks_aff // //NOTE: We mutate imember_from so the outer from macro walking loop moves in lock-step int imember_to = 0; - t_pl_offset from_to_macro_offset = place_ctx.pl_macros[imacro_from].members[imember_from].offset; - for (; imember_from < int(place_ctx.pl_macros[imacro_from].members.size()) && imember_to < int(place_ctx.pl_macros[imacro_to].members.size()); + t_pl_offset from_to_macro_offset = pl_macros[imacro_from].members[imember_from].offset; + for (; imember_from < int(pl_macros[imacro_from].members.size()) && imember_to < int(pl_macros[imacro_to].members.size()); ++imember_from, ++imember_to) { //Check that both macros have the same shape while they overlap - if (place_ctx.pl_macros[imacro_from].members[imember_from].offset != place_ctx.pl_macros[imacro_to].members[imember_to].offset + from_to_macro_offset) { + if (pl_macros[imacro_from].members[imember_from].offset != pl_macros[imacro_to].members[imember_to].offset + from_to_macro_offset) { log_move_abort("macro shapes disagree"); return e_block_move_result::ABORT; } - ClusterBlockId b_from = place_ctx.pl_macros[imacro_from].members[imember_from].blk_index; + ClusterBlockId b_from = pl_macros[imacro_from].members[imember_from].blk_index; - t_pl_loc curr_to = place_ctx.block_locs[b_from].loc + swap_offset; - t_pl_loc curr_from = place_ctx.block_locs[b_from].loc; + t_pl_loc curr_to = block_locs[b_from].loc + swap_offset; + t_pl_loc curr_from = block_locs[b_from].loc; - ClusterBlockId b_to = place_ctx.pl_macros[imacro_to].members[imember_to].blk_index; - VTR_ASSERT_SAFE(curr_to == place_ctx.block_locs[b_to].loc); + ClusterBlockId b_to = pl_macros[imacro_to].members[imember_to].blk_index; + VTR_ASSERT_SAFE(curr_to == block_locs[b_to].loc); // Check whether block to is compatible with from location - if (b_to != EMPTY_BLOCK_ID && b_to != INVALID_BLOCK_ID) { - if (!(is_legal_swap_to_location(b_to, curr_from))) { + if (b_to != ClusterBlockId::INVALID()) { + if (!(is_legal_swap_to_location(b_to, curr_from, blk_loc_registry))) { return e_block_move_result::ABORT; } } - if (!is_legal_swap_to_location(b_from, curr_to)) { + if (!is_legal_swap_to_location(b_from, curr_to, blk_loc_registry)) { log_move_abort("macro_from swap to location illegal"); return e_block_move_result::ABORT; } - auto outcome = record_single_block_swap(blocks_affected, b_from, curr_to); + auto outcome = record_single_block_swap(blocks_affected, b_from, curr_to, blk_loc_registry); if (outcome != e_block_move_result::VALID) { return outcome; } } - if (imember_to < int(place_ctx.pl_macros[imacro_to].members.size())) { + if (imember_to < int(pl_macros[imacro_to].members.size())) { //The to macro extends beyond the from macro. // //Swap the remainder of the 'to' macro to locations after the 'from' macro. //Note that we are swapping in the opposite direction so the swap offsets are inverted. - return record_macro_swaps(blocks_affected, imacro_to, imember_to, -swap_offset); + return record_macro_swaps(blocks_affected, imacro_to, imember_to, -swap_offset, blk_loc_registry); } return e_block_move_result::VALID; @@ -327,25 +346,28 @@ e_block_move_result record_macro_macro_swaps(t_pl_blocks_to_be_moved& blocks_aff e_block_move_result record_macro_move(t_pl_blocks_to_be_moved& blocks_affected, std::vector& displaced_blocks, const int imacro, - t_pl_offset swap_offset) { - auto& place_ctx = g_vpr_ctx.placement(); + t_pl_offset swap_offset, + const BlkLocRegistry& blk_loc_registry) { + const auto& pl_macros = g_vpr_ctx.placement().pl_macros; + const auto& block_locs = blk_loc_registry.block_locs(); + const GridBlock& grid_blocks = blk_loc_registry.grid_blocks(); - for (const t_pl_macro_member& member : place_ctx.pl_macros[imacro].members) { - t_pl_loc from = place_ctx.block_locs[member.blk_index].loc; + for (const t_pl_macro_member& member : pl_macros[imacro].members) { + t_pl_loc from = block_locs[member.blk_index].loc; t_pl_loc to = from + swap_offset; - if (!is_legal_swap_to_location(member.blk_index, to)) { + if (!is_legal_swap_to_location(member.blk_index, to, blk_loc_registry)) { log_move_abort("macro move to location illegal"); return e_block_move_result::ABORT; } - ClusterBlockId blk_to = place_ctx.grid_blocks.block_at_location(to); + ClusterBlockId blk_to = grid_blocks.block_at_location(to); - blocks_affected.record_block_move(member.blk_index, to); + blocks_affected.record_block_move(member.blk_index, to, blk_loc_registry); int imacro_to = -1; - get_imacro_from_iblk(&imacro_to, blk_to, place_ctx.pl_macros); + get_imacro_from_iblk(&imacro_to, blk_to, pl_macros); if (blk_to && imacro_to != imacro) { //Block displaced only if exists and not part of current macro displaced_blocks.push_back(blk_to); } @@ -356,31 +378,37 @@ e_block_move_result record_macro_move(t_pl_blocks_to_be_moved& blocks_affected, //Returns the set of macros affected by moving imacro by the specified offset // //The resulting 'macros' may contain duplicates -e_block_move_result identify_macro_self_swap_affected_macros(std::vector& macros, const int imacro, t_pl_offset swap_offset) { +e_block_move_result identify_macro_self_swap_affected_macros(std::vector& macros, + const int imacro, + t_pl_offset swap_offset, + const BlkLocRegistry& blk_loc_registry) { + const auto& pl_macros = g_vpr_ctx.placement().pl_macros; + const auto& block_locs = blk_loc_registry.block_locs(); + const GridBlock& grid_blocks = blk_loc_registry.grid_blocks(); + e_block_move_result outcome = e_block_move_result::VALID; - auto& place_ctx = g_vpr_ctx.placement(); - for (size_t imember = 0; imember < place_ctx.pl_macros[imacro].members.size() && outcome == e_block_move_result::VALID; ++imember) { - ClusterBlockId blk = place_ctx.pl_macros[imacro].members[imember].blk_index; + for (size_t imember = 0; imember < pl_macros[imacro].members.size() && outcome == e_block_move_result::VALID; ++imember) { + ClusterBlockId blk = pl_macros[imacro].members[imember].blk_index; - t_pl_loc from = place_ctx.block_locs[blk].loc; + t_pl_loc from = block_locs[blk].loc; t_pl_loc to = from + swap_offset; - if (!is_legal_swap_to_location(blk, to)) { + if (!is_legal_swap_to_location(blk, to, blk_loc_registry)) { log_move_abort("macro move to location illegal"); return e_block_move_result::ABORT; } - ClusterBlockId blk_to = place_ctx.grid_blocks.block_at_location(to); + ClusterBlockId blk_to = grid_blocks.block_at_location(to); int imacro_to = -1; - get_imacro_from_iblk(&imacro_to, blk_to, place_ctx.pl_macros); + get_imacro_from_iblk(&imacro_to, blk_to, pl_macros); if (imacro_to != -1) { auto itr = std::find(macros.begin(), macros.end(), imacro_to); if (itr == macros.end()) { macros.push_back(imacro_to); - outcome = identify_macro_self_swap_affected_macros(macros, imacro_to, swap_offset); + outcome = identify_macro_self_swap_affected_macros(macros, imacro_to, swap_offset, blk_loc_registry); } } } @@ -389,16 +417,16 @@ e_block_move_result identify_macro_self_swap_affected_macros(std::vector& m e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affected, const int imacro, - t_pl_offset swap_offset) { - auto& place_ctx = g_vpr_ctx.placement(); + t_pl_offset swap_offset, + const BlkLocRegistry& blk_loc_registry) { + const auto& pl_macros = g_vpr_ctx.placement().pl_macros; //Reset any partial move blocks_affected.clear_move_blocks(); //Collect the macros affected std::vector affected_macros; - auto outcome = identify_macro_self_swap_affected_macros(affected_macros, imacro, - swap_offset); + auto outcome = identify_macro_self_swap_affected_macros(affected_macros, imacro, swap_offset, blk_loc_registry); if (outcome != e_block_move_result::VALID) { return outcome; @@ -411,7 +439,7 @@ e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affe //Move all the affected macros by the offset for (int imacro_affected : affected_macros) { - outcome = record_macro_move(blocks_affected, displaced_blocks, imacro_affected, swap_offset); + outcome = record_macro_move(blocks_affected, displaced_blocks, imacro_affected, swap_offset, blk_loc_registry); if (outcome != e_block_move_result::VALID) { return outcome; @@ -420,7 +448,7 @@ e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affe auto is_non_macro_block = [&](ClusterBlockId blk) { int imacro_blk = -1; - get_imacro_from_iblk(&imacro_blk, blk, place_ctx.pl_macros); + get_imacro_from_iblk(&imacro_blk, blk, pl_macros); if (std::find(affected_macros.begin(), affected_macros.end(), imacro_blk) != affected_macros.end()) { return false; @@ -438,26 +466,29 @@ e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affe //Fit the displaced blocks into the empty locations auto loc_itr = empty_locs.begin(); - for (auto blk : non_macro_displaced_blocks) { - outcome = blocks_affected.record_block_move(blk, *loc_itr); + for (ClusterBlockId blk : non_macro_displaced_blocks) { + outcome = blocks_affected.record_block_move(blk, *loc_itr, blk_loc_registry); ++loc_itr; } return outcome; } -bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) { +bool is_legal_swap_to_location(ClusterBlockId blk, + t_pl_loc to, + const BlkLocRegistry& blk_loc_registry) { //Make sure that the swap_to location is valid //It must be: // * on chip, and // * match the correct block type // //Note that we need to explicitly check that the types match, since the device floorplan is not - //(neccessarily) translationally invariant for an arbitrary macro + //(necessarily) translationally invariant for an arbitrary macro + const auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& block_locs = blk_loc_registry.block_locs(); + const GridBlock& grid_blocks = blk_loc_registry.grid_blocks(); - auto& device_ctx = g_vpr_ctx.device(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); if (to.x < 0 || to.x >= int(device_ctx.grid.width()) || to.y < 0 || to.y >= int(device_ctx.grid.height()) @@ -474,9 +505,9 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) { return false; } // If the destination block is user constrained, abort this swap - auto b_to = place_ctx.grid_blocks.block_at_location(to); - if (b_to != INVALID_BLOCK_ID && b_to != EMPTY_BLOCK_ID) { - if (place_ctx.block_locs[b_to].is_fixed) { + ClusterBlockId b_to = grid_blocks.block_at_location(to); + if (b_to) { + if (block_locs[b_to].is_fixed) { return false; } } @@ -534,25 +565,18 @@ void enable_placer_debug(const t_placer_opts& placer_opts, } #endif -#ifdef VTR_ENABLE_DEBUG_LOGGING ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts, int& logical_blk_type_index, bool highly_crit_block, ClusterNetId* net_from, - int* pin_from) { -#else -ClusterBlockId propose_block_to_move(const t_placer_opts& /* placer_opts */, - int& logical_blk_type_index, - bool highly_crit_block, - ClusterNetId* net_from, - int* pin_from) { -#endif + int* pin_from, + const PlacerState& placer_state) { ClusterBlockId b_from = ClusterBlockId::INVALID(); auto& cluster_ctx = g_vpr_ctx.clustering(); if (logical_blk_type_index == -1) { //If the block type is unspecified, choose any random block to be swapped with another random block if (highly_crit_block) { - b_from = pick_from_highly_critical_block(*net_from, *pin_from); + b_from = pick_from_highly_critical_block(*net_from, *pin_from, placer_state); } else { b_from = pick_from_block(); } @@ -563,13 +587,15 @@ ClusterBlockId propose_block_to_move(const t_placer_opts& /* placer_opts */, } } else { //If the block type is specified, choose a random block with blk_type to be swapped with another random block if (highly_crit_block) { - b_from = pick_from_highly_critical_block(*net_from, *pin_from, logical_blk_type_index); + b_from = pick_from_highly_critical_block(*net_from, *pin_from, logical_blk_type_index, placer_state); } else { b_from = pick_from_block(logical_blk_type_index); } } #ifdef VTR_ENABLE_DEBUG_LOGGING enable_placer_debug(placer_opts, b_from); +#else + (void)placer_opts; #endif return b_from; @@ -620,10 +646,12 @@ ClusterBlockId pick_from_block(const int logical_blk_type_index) { //Pick a random highly critical block to be swapped with another random block. //If none is found return ClusterBlockId::INVALID() -ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from) { - auto& place_move_ctx = g_placer_ctx.move(); - auto& place_ctx = g_vpr_ctx.placement(); +ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, + int& pin_from, + const PlacerState& placer_state) { auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& place_move_ctx = placer_state.move(); + auto& block_locs = placer_state.block_locs(); //Initialize critical net and pin to be invalid net_from = ClusterNetId::INVALID(); @@ -638,7 +666,7 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_ std::pair crit_pin = place_move_ctx.highly_crit_pins[vtr::irand(place_move_ctx.highly_crit_pins.size() - 1)]; ClusterBlockId b_from = cluster_ctx.clb_nlist.net_driver_block(crit_pin.first); - if (place_ctx.block_locs[b_from].is_fixed) { + if (block_locs[b_from].is_fixed) { return ClusterBlockId::INVALID(); //Block is fixed, cannot move } @@ -652,10 +680,13 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_ //Pick a random highly critical block with a specified block type to be swapped with another random block. //If none is found return ClusterBlockId::INVALID() -ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from, const int logical_blk_type_index) { - auto& place_move_ctx = g_placer_ctx.move(); - auto& place_ctx = g_vpr_ctx.placement(); +ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, + int& pin_from, + const int logical_blk_type_index, + const PlacerState& placer_state) { auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& place_move_ctx = placer_state.move(); + auto& block_locs = placer_state.block_locs(); //Initialize critical net and pin to be invalid net_from = ClusterNetId::INVALID(); @@ -674,7 +705,7 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_ //blk_type from propose move doesn't account for the EMPTY type auto b_from_type = cluster_ctx.clb_nlist.block_type(b_from); if (b_from_type->index == logical_blk_type_index) { - if (place_ctx.block_locs[b_from].is_fixed) { + if (block_locs[b_from].is_fixed) { return ClusterBlockId::INVALID(); //Block is fixed, cannot move } @@ -690,9 +721,10 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_ bool find_to_loc_uniform(t_logical_block_type_ptr type, float rlim, - const t_pl_loc from, + const t_pl_loc& from, t_pl_loc& to, - ClusterBlockId b_from) { + ClusterBlockId b_from, + const BlkLocRegistry& blk_loc_registry) { //Finds a legal swap to location for the given type, starting from 'from.x' and 'from.y' // //Note that the range limit (rlim) is applied in a logical sense (i.e. 'compressed' grid space consisting @@ -739,9 +771,10 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type, compressed_locs[to_layer_num], search_range, to_compressed_loc, - false, + /*is_median=*/false, to_layer_num, - false); + /*search_for_empty=*/false, + blk_loc_registry); if (!legal) { //No valid position found @@ -780,7 +813,8 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type, const t_pl_loc& from_loc, const t_bb* limit_coords, t_pl_loc& to_loc, - ClusterBlockId b_from) { + ClusterBlockId b_from, + const BlkLocRegistry& blk_loc_registry) { int num_layers = g_vpr_ctx.device().grid.get_num_layers(); const int to_layer_num = to_loc.layer; VTR_ASSERT(to_layer_num != OPEN); @@ -836,9 +870,10 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type, from_compressed_locs[to_layer_num], search_range, to_compressed_loc, - true, + /*is_median=*/true, to_layer_num, - false); + /*search_for_empty=*/false, + blk_loc_registry); if (!legal) { //No valid position found @@ -869,7 +904,8 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type, const t_pl_loc& centroid, const t_range_limiters& range_limiters, t_pl_loc& to_loc, - ClusterBlockId b_from) { + ClusterBlockId b_from, + const BlkLocRegistry& blk_loc_registry) { //Retrieve the compressed block grid for this block type const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[blk_type->index]; const int to_layer_num = centroid.layer; @@ -922,9 +958,10 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type, from_compressed_loc[to_layer_num], search_range, to_compressed_loc, - false, + /*is_median=*/false, to_layer_num, - false); + /*search_for_empty=*/false, + blk_loc_registry); if (!legal) { //No valid position found @@ -985,9 +1022,9 @@ void compressed_grid_to_loc(t_logical_block_type_ptr blk_type, } int find_empty_compatible_subtile(t_logical_block_type_ptr type, - const t_physical_tile_loc& to_loc) { + const t_physical_tile_loc& to_loc, + const GridBlock& grid_blocks) { auto& device_ctx = g_vpr_ctx.device(); - auto& place_ctx = g_vpr_ctx.placement(); const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index]; int return_sub_tile = -1; @@ -995,10 +1032,11 @@ int find_empty_compatible_subtile(t_logical_block_type_ptr type, t_pl_loc to_uncompressed_loc; compressed_grid_to_loc(type, to_loc, to_uncompressed_loc); const t_physical_tile_loc to_phy_uncompressed_loc{to_uncompressed_loc.x, to_uncompressed_loc.y, to_uncompressed_loc.layer}; - const auto& phy_type = device_ctx.grid.get_physical_type(to_phy_uncompressed_loc); + const t_physical_tile_type_ptr phy_type = device_ctx.grid.get_physical_type(to_phy_uncompressed_loc); const auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tiles_for_tile.at(phy_type->index); - for (const auto& sub_tile : compatible_sub_tiles) { - if (place_ctx.grid_blocks.is_sub_tile_empty(to_phy_uncompressed_loc, sub_tile)) { + + for (const int sub_tile : compatible_sub_tiles) { + if (grid_blocks.is_sub_tile_empty(to_phy_uncompressed_loc, sub_tile)) { return_sub_tile = sub_tile; break; } @@ -1014,7 +1052,8 @@ bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, t_physical_tile_loc& to_loc, bool is_median, int to_layer_num, - bool search_for_empty) { + bool search_for_empty, + const BlkLocRegistry& blk_loc_registry) { //TODO For the time being, the blocks only moved in the same layer. This assertion should be removed after VPR is updated to move blocks between layers VTR_ASSERT(to_layer_num == from_loc.layer_num); const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index]; @@ -1097,7 +1136,7 @@ bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, if (from_loc.x == to_loc.x && from_loc.y == to_loc.y && from_loc.layer_num == to_layer_num) { continue; //Same from/to location -- try again for new y-position } else if (search_for_empty) { // Check if the location has at least one empty sub-tile - legal = find_empty_compatible_subtile(type, to_loc) >= 0; + legal = find_empty_compatible_subtile(type, to_loc, blk_loc_registry.grid_blocks()) >= 0; } else { legal = true; } @@ -1254,20 +1293,23 @@ std::string e_move_result_to_string(e_move_result move_outcome) { return move_result_to_string[move_outcome]; } -int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc) { +int find_free_layer(t_logical_block_type_ptr logical_block, + const t_pl_loc& loc, + const BlkLocRegistry& blk_loc_registry) { const auto& device_ctx = g_vpr_ctx.device(); - const auto& place_ctx = g_vpr_ctx.placement(); + const auto& compressed_grids = g_vpr_ctx.placement().compressed_block_grids; + const GridBlock& grid_blocks = blk_loc_registry.grid_blocks(); // TODO: Compatible layer vector should be shuffled first, and then iterated through int free_layer = loc.layer; VTR_ASSERT(loc.layer != OPEN); if (device_ctx.grid.get_num_layers() > 1) { - const auto& compatible_layers = place_ctx.compressed_block_grids[logical_block->index].get_layer_nums(); + const auto& compatible_layers = compressed_grids[logical_block->index].get_layer_nums(); if (compatible_layers.size() > 1) { - if (place_ctx.grid_blocks.block_at_location(loc) != EMPTY_BLOCK_ID) { + if (grid_blocks.block_at_location(loc)) { for (const auto& layer : compatible_layers) { if (layer != free_layer) { - if (place_ctx.grid_blocks.block_at_location(loc) == EMPTY_BLOCK_ID) { + if (grid_blocks.block_at_location(loc) == ClusterBlockId::INVALID()) { free_layer = layer; break; } diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h index 3bb70438eae..80359dd07a2 100644 --- a/vpr/src/place/move_utils.h +++ b/vpr/src/place/move_utils.h @@ -1,9 +1,13 @@ #ifndef VPR_MOVE_UTILS_H #define VPR_MOVE_UTILS_H + #include "vpr_types.h" #include "move_transactions.h" #include "compressed_grid.h" +class PlacerState; +class BlkLocRegistry; + /* Cut off for incremental bounding box updates. * * 4 is fastest -- I checked. */ /* To turn off incremental bounding box updates, set this to a huge value */ @@ -87,13 +91,28 @@ struct t_range_limiters { float dm_rlim; }; +/** + * These variables keep track of the number of swaps + * rejected, accepted or aborted. The total number of swap attempts + * is the sum of the three number. + */ +struct t_swap_stats { + int num_swap_rejected = 0; + int num_swap_accepted = 0; + int num_swap_aborted = 0; + int num_ts_called = 0; +}; + //Records a reasons for an aborted move void log_move_abort(std::string_view reason); //Prints a breif report about aborted move reasons and counts void report_aborted_moves(); -e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to); +e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, + ClusterBlockId b_from, + t_pl_loc to, + const BlkLocRegistry& blk_loc_registry); /** * @brief Find the blocks that will be affected by a move of b_from to to_loc @@ -103,19 +122,45 @@ e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlock * @return e_block_move_result ABORT if either of the the moving blocks are already stored, or either of the blocks are fixed, to location is not * compatible, etc. INVERT if the "from" block is a single block and the "to" block is a macro. VALID otherwise. */ -e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to); - -e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to); - -e_block_move_result record_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, const int imacro_from, int& imember_from, t_pl_offset swap_offset); -e_block_move_result record_macro_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, const int imacro_from, int& imember_from, const int imacro_to, ClusterBlockId blk_to, t_pl_offset swap_offset); +e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affected, + ClusterBlockId b_from, + t_pl_loc to, + const BlkLocRegistry& blk_loc_registry); + +e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_affected, + ClusterBlockId b_from, + t_pl_loc to, + const BlkLocRegistry& blk_loc_registry); + +e_block_move_result record_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, + const int imacro_from, + int& imember_from, + t_pl_offset swap_offset, + const BlkLocRegistry& blk_loc_registry); + +e_block_move_result record_macro_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, + const int imacro_from, + int& imember_from, + const int imacro_to, + ClusterBlockId blk_to, + t_pl_offset swap_offset, + const BlkLocRegistry& blk_loc_registry); e_block_move_result record_macro_move(t_pl_blocks_to_be_moved& blocks_affected, std::vector& displaced_blocks, const int imacro, - t_pl_offset swap_offset); -e_block_move_result identify_macro_self_swap_affected_macros(std::vector& macros, const int imacro, t_pl_offset swap_offset); -e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affected, const int imacro, t_pl_offset swap_offset); + t_pl_offset swap_offset, + const BlkLocRegistry& blk_loc_registry); + +e_block_move_result identify_macro_self_swap_affected_macros(std::vector& macros, + const int imacro, + t_pl_offset swap_offset, + const BlkLocRegistry& blk_loc_registry); + +e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affected, + const int imacro, + t_pl_offset swap_offset, + const BlkLocRegistry& blk_loc_registry); /** * @brief Check whether the "to" location is legal for the given "blk" @@ -123,7 +168,9 @@ e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affe * @param to * @return True if this would be a legal move, false otherwise */ -bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to); +bool is_legal_swap_to_location(ClusterBlockId blk, + t_pl_loc to, + const BlkLocRegistry& blk_loc_registry); /** * @brief Propose block for the RL agent based on required block type. @@ -139,7 +186,8 @@ ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts, int& logical_blk_type_index, bool highly_crit_block, ClusterNetId* net_from, - int* pin_from); + int* pin_from, + const PlacerState& placer_state); /** * Returns all movable clustered blocks with a specified logical block type. @@ -169,7 +217,9 @@ ClusterBlockId pick_from_block(int logical_blk_type_index); * * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found */ -ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from); +ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, + int& pin_from, + const PlacerState& placer_state); /** * @brief Find a block with a specific block type to be swapped with another block @@ -178,13 +228,17 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_ * * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found */ -ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from, int logical_blk_type_index); +ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, + int& pin_from, + int logical_blk_type_index, + const PlacerState& placer_state); bool find_to_loc_uniform(t_logical_block_type_ptr type, float rlim, - const t_pl_loc from, + const t_pl_loc& from, t_pl_loc& to, - ClusterBlockId b_from); + ClusterBlockId b_from, + const BlkLocRegistry& blk_loc_registry); // Accessor f_placer_breakpoint_reached // return true when a placer breakpoint is reached @@ -202,12 +256,19 @@ void set_placer_breakpoint_reached(bool); * if it was able to find a compatible location and false otherwise. * It is similar to find_to_loc_uniform but searching in a defined range instead of searching in a range around the current block location. * - * @param blk_type: the type of the moving block - * @param from_loc: the original location of the moving block - * @param limit_coords: the region where I can move the block to - * @param to_loc: the new location that the function picked for the block + * @param blk_type the type of the moving block + * @param from_loc the original location of the moving block + * @param limit_coords the region where I can move the block to + * @param to_loc the new location that the function picked for the block + * @param b_from The unique ID of the clustered block whose median location is to be computed. + * @param blk_loc_registry Information about clustered block locations. */ -bool find_to_loc_median(t_logical_block_type_ptr blk_type, const t_pl_loc& from_loc, const t_bb* limit_coords, t_pl_loc& to_loc, ClusterBlockId b_from); +bool find_to_loc_median(t_logical_block_type_ptr blk_type, + const t_pl_loc& from_loc, + const t_bb* limit_coords, + t_pl_loc& to_loc, + ClusterBlockId b_from, + const BlkLocRegistry& blk_loc_registry); /** * @brief Find a legal swap to location for the given type in a range around a specific location. @@ -228,7 +289,8 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type, const t_pl_loc& centeroid, const t_range_limiters& range_limiters, t_pl_loc& to_loc, - ClusterBlockId b_from); + ClusterBlockId b_from, + const BlkLocRegistry& blk_loc_registry); const std::string& move_type_to_string(e_move_type); @@ -253,12 +315,14 @@ void compressed_grid_to_loc(t_logical_block_type_ptr blk_type, * * @param type logical block type * @param to_loc The location to be checked + * @param grid_blocks A mapping from grid locations to clustered blocks placed there. * * @return int The subtile number if there is an empty compatible subtile, otherwise -1 * is returned to indicate that there are no empty subtiles compatible with the given type.. */ int find_empty_compatible_subtile(t_logical_block_type_ptr type, - const t_physical_tile_loc& to_loc); + const t_physical_tile_loc& to_loc, + const GridBlock& grid_blocks); /** * @brief find compressed location in a compressed range for a specific type in the given layer (to_layer_num) @@ -278,7 +342,8 @@ bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, t_physical_tile_loc& to_loc, bool is_median, int to_layer_num, - bool search_for_empty); + bool search_for_empty, + const BlkLocRegistry& blk_loc_registry); /** * @brief Get the the compressed loc from the uncompressed loc (grid_loc) @@ -366,12 +431,10 @@ std::string e_move_result_to_string(e_move_result move_outcome); * @brif Iterate over all layers that have a physical tile at the x-y location specified by "loc" that can accommodate "logical_block". * If the location in the layer specified by "layer_num" is empty, return that layer. Otherwise, * return a layer that is not occupied at that location. If there isn't any, again, return the layer of loc. - * - * @param logical_block - * @param loc - * @return */ -int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc); +int find_free_layer(t_logical_block_type_ptr logical_block, + const t_pl_loc& loc, + const BlkLocRegistry& blk_loc_registry); int get_random_layer(t_logical_block_type_ptr logical_block); diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 53aed32dea1..97384c5f1e8 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -24,15 +24,19 @@ * @date July 12, 2024 */ #include "net_cost_handler.h" + #include "clustered_netlist_fwd.h" #include "globals.h" #include "physical_types.h" -#include "placer_globals.h" +#include "placer_state.h" #include "move_utils.h" #include "place_timing_update.h" #include "noc_place_utils.h" #include "vtr_math.h" +#include +#include + using std::max; using std::min; @@ -46,13 +50,7 @@ enum class NetUpdateState { GOT_FROM_SCRATCH }; -/** - * @brief The error tolerance due to round off for the total cost computation. - * When we check it from scratch vs. incrementally. 0.01 means that there is a 1% error tolerance. - */ -#define ERROR_TOL .01 - -const int MAX_FANOUT_CROSSING_COUNT = 50; +static constexpr int MAX_FANOUT_CROSSING_COUNT = 50; /** * @brief Crossing counts for nets with different #'s of pins. From @@ -155,6 +153,12 @@ static struct TSInfo ts_info; static BBUpdater bb_updater; +static std::optional> placer_state_ref; + +void set_net_handlers_placer_state(PlacerState& placer_state) { + placer_state_ref = std::ref(placer_state); +} + /** * @param net * @param moved_blocks @@ -171,9 +175,9 @@ static bool driven_by_moved_block(const ClusterNetId net, * @param blk_pin * @param pl_moved_block */ -static void update_net_bb(const ClusterNetId& net, - const ClusterBlockId& blk, - const ClusterPinId& blk_pin, +static void update_net_bb(const ClusterNetId net, + const ClusterBlockId blk, + const ClusterPinId blk_pin, const t_pl_moved_block& pl_moved_block); /** @@ -208,9 +212,9 @@ static void record_affected_net(const ClusterNetId net); * @param place_algorithm Placement algorithm * @param delay_model Timing delay model used by placer * @param criticalities Connections timing criticalities - * @param blk_id Block ID of that the moving pin blongs to. + * @param blk_id Block ID of that the moving pin belongs to. * @param pin_id Pin ID of the moving pin - * @param moving_blk_inf Data structure that holds information, e.g., old location and new locatoin, about all moving blocks + * @param moving_blk_inf Data structure that holds information, e.g., old location and new location, about all moving blocks * @param affected_pins Netlist pins which are affected, in terms placement cost, by the proposed move. * @param timing_delta_c Timing cost change based on the proposed move * @param is_src_moving Is the moving pin the source of a net. @@ -545,7 +549,7 @@ double BBUpdater::get_net_cost(const ClusterNetId net_id) { } void BBUpdater::set_ts_bb_coord(const ClusterNetId net_id) { - auto& place_move_ctx = g_placer_ctx.mutable_move(); + auto& place_move_ctx = placer_state_ref->get().mutable_move(); if (m_cube_bb) { place_move_ctx.bb_coords[net_id] = ts_info.ts_bb_coord_new[net_id]; } else { @@ -554,7 +558,7 @@ void BBUpdater::set_ts_bb_coord(const ClusterNetId net_id) { } void BBUpdater::set_ts_edge(const ClusterNetId net_id) { - auto& place_move_ctx = g_placer_ctx.mutable_move(); + auto& place_move_ctx = placer_state_ref->get().mutable_move(); if (m_cube_bb) { place_move_ctx.bb_num_on_edges[net_id] = ts_info.ts_bb_edge_new[net_id]; } else { @@ -568,8 +572,7 @@ static bool driven_by_moved_block(const ClusterNetId net, const std::vector& moved_blocks) { auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; bool is_driven_by_move_blk = false; - ClusterBlockId net_driver_block = clb_nlist.net_driver_block( - net); + ClusterBlockId net_driver_block = clb_nlist.net_driver_block(net); for (const auto& block : moved_blocks) { if (net_driver_block == block.block_num) { @@ -581,11 +584,13 @@ static bool driven_by_moved_block(const ClusterNetId net, return is_driven_by_move_blk; } -static void update_net_bb(const ClusterNetId& net, - const ClusterBlockId& blk, - const ClusterPinId& blk_pin, +static void update_net_bb(const ClusterNetId net, + const ClusterBlockId blk, + const ClusterPinId blk_pin, const t_pl_moved_block& pl_moved_block) { - auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& placer_state = placer_state_ref->get(); + const auto& block_locs = placer_state.block_locs(); if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) { //For small nets brute-force bounding box update is faster @@ -595,9 +600,10 @@ static void update_net_bb(const ClusterNetId& net, } } else { //For large nets, update bounding box incrementally - int iblk_pin = tile_pin_index(blk_pin); + int iblk_pin = placer_state.blk_loc_registry().tile_pin_index(blk_pin); - t_physical_tile_type_ptr blk_type = physical_tile_type(blk); + t_pl_loc block_loc = block_locs[blk].loc; + t_physical_tile_type_ptr blk_type = physical_tile_type(block_loc); int pin_width_offset = blk_type->pin_width_offset[iblk_pin]; int pin_height_offset = blk_type->pin_height_offset[iblk_pin]; bool is_driver = cluster_ctx.clb_nlist.pin_type(blk_pin) == PinType::DRIVER; @@ -647,19 +653,20 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model, * for incremental static timing analysis (incremental STA). */ auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& placer_state = placer_state_ref->get(); + auto& block_locs = placer_state.block_locs(); - const auto& connection_delay = g_placer_ctx.timing().connection_delay; - auto& connection_timing_cost = g_placer_ctx.mutable_timing().connection_timing_cost; - auto& proposed_connection_delay = g_placer_ctx.mutable_timing().proposed_connection_delay; - auto& proposed_connection_timing_cost = g_placer_ctx.mutable_timing().proposed_connection_timing_cost; + const auto& connection_delay = placer_state.timing().connection_delay; + auto& connection_timing_cost = placer_state.mutable_timing().connection_timing_cost; + auto& proposed_connection_delay = placer_state.mutable_timing().proposed_connection_delay; + auto& proposed_connection_timing_cost = placer_state.mutable_timing().proposed_connection_timing_cost; if (cluster_ctx.clb_nlist.pin_type(pin) == PinType::DRIVER) { /* This pin is a net driver on a moved block. */ /* Recompute all point to point connection delays for the net sinks. */ for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net).size(); ipin++) { - float temp_delay = comp_td_single_connection_delay(delay_model, net, - ipin); + float temp_delay = comp_td_single_connection_delay(delay_model, block_locs, net, ipin); /* If the delay hasn't changed, do not mark this pin as affected */ if (temp_delay == connection_delay[net][ipin]) { continue; @@ -685,8 +692,7 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model, /* Get the sink pin index in the net */ int ipin = cluster_ctx.clb_nlist.pin_net_index(pin); - float temp_delay = comp_td_single_connection_delay(delay_model, net, - ipin); + float temp_delay = comp_td_single_connection_delay(delay_model, block_locs, net, ipin); /* If the delay hasn't changed, do not mark this pin as affected */ if (temp_delay == connection_delay[net][ipin]) { return; @@ -696,8 +702,7 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model, proposed_connection_delay[net][ipin] = temp_delay; proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay; - delta_timing_cost += proposed_connection_timing_cost[net][ipin] - - connection_timing_cost[net][ipin]; + delta_timing_cost += proposed_connection_timing_cost[net][ipin] - connection_timing_cost[net][ipin]; /* Record this connection in blocks_affected.affected_pins */ affected_pins.push_back(pin); @@ -760,42 +765,37 @@ static void get_non_updatable_bb(ClusterNetId net_id, t_bb& bb_coord_new, vtr::NdMatrixProxy num_sink_pin_layer) { //TODO: account for multiple physical pin instances per logical pin - - int xmax, ymax, layer_max, xmin, ymin, layer_min, x, y, layer; - int pnum; - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); auto& device_ctx = g_vpr_ctx.device(); + auto& placer_state = placer_state_ref->get(); + auto& block_locs = placer_state.block_locs(); ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id); - pnum = net_pin_to_tile_pin_index(net_id, 0); + int pnum = placer_state.blk_loc_registry().net_pin_to_tile_pin_index(net_id, 0); - x = place_ctx.block_locs[bnum].loc.x - + physical_tile_type(bnum)->pin_width_offset[pnum]; - y = place_ctx.block_locs[bnum].loc.y - + physical_tile_type(bnum)->pin_height_offset[pnum]; - layer = place_ctx.block_locs[bnum].loc.layer; + t_pl_loc block_loc = block_locs[bnum].loc; + int x = block_loc.x + physical_tile_type(block_loc)->pin_width_offset[pnum]; + int y = block_loc.y + physical_tile_type(block_loc)->pin_height_offset[pnum]; + int layer = block_loc.layer; - xmin = x; - ymin = y; - layer_min = layer; - xmax = x; - ymax = y; - layer_max = layer; + int xmin = x; + int ymin = y; + int layer_min = layer; + int xmax = x; + int ymax = y; + int layer_max = layer; for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { num_sink_pin_layer[layer_num] = 0; } - for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { + for (ClusterPinId pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { bnum = cluster_ctx.clb_nlist.pin_block(pin_id); - pnum = tile_pin_index(pin_id); - x = place_ctx.block_locs[bnum].loc.x - + physical_tile_type(bnum)->pin_width_offset[pnum]; - y = place_ctx.block_locs[bnum].loc.y - + physical_tile_type(bnum)->pin_height_offset[pnum]; - layer = place_ctx.block_locs[bnum].loc.layer; + block_loc = block_locs[bnum].loc; + pnum = placer_state.blk_loc_registry().tile_pin_index(pin_id); + x = block_loc.x + physical_tile_type(block_loc)->pin_width_offset[pnum]; + y = block_loc.y + physical_tile_type(block_loc)->pin_height_offset[pnum]; + layer = block_loc.layer; if (x < xmin) { xmin = x; @@ -838,40 +838,36 @@ static void get_non_updatable_layer_bb(ClusterNetId net_id, std::vector& bb_coord_new, vtr::NdMatrixProxy num_sink_layer) { //TODO: account for multiple physical pin instances per logical pin - auto& device_ctx = g_vpr_ctx.device(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& placer_state = placer_state_ref->get(); + auto& block_locs = placer_state.block_locs(); + int num_layers = device_ctx.grid.get_num_layers(); for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { num_sink_layer[layer_num] = 0; } - int pnum; - - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); - ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id); - pnum = net_pin_to_tile_pin_index(net_id, 0); + t_pl_loc block_loc = block_locs[bnum].loc; + int pnum = placer_state.blk_loc_registry().net_pin_to_tile_pin_index(net_id, 0); - int src_x = place_ctx.block_locs[bnum].loc.x - + physical_tile_type(bnum)->pin_width_offset[pnum]; - int src_y = place_ctx.block_locs[bnum].loc.y - + physical_tile_type(bnum)->pin_height_offset[pnum]; + int src_x = block_locs[bnum].loc.x + physical_tile_type(block_loc)->pin_width_offset[pnum]; + int src_y = block_locs[bnum].loc.y + physical_tile_type(block_loc)->pin_height_offset[pnum]; std::vector xmin(num_layers, src_x); std::vector ymin(num_layers, src_y); std::vector xmax(num_layers, src_x); std::vector ymax(num_layers, src_y); - for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { + for (ClusterPinId pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { bnum = cluster_ctx.clb_nlist.pin_block(pin_id); - pnum = tile_pin_index(pin_id); - int x = place_ctx.block_locs[bnum].loc.x - + physical_tile_type(bnum)->pin_width_offset[pnum]; - int y = place_ctx.block_locs[bnum].loc.y - + physical_tile_type(bnum)->pin_height_offset[pnum]; + block_loc = block_locs[bnum].loc; + pnum = placer_state.blk_loc_registry().tile_pin_index(pin_id); + int x = block_locs[bnum].loc.x + physical_tile_type(block_loc)->pin_width_offset[pnum]; + int y = block_locs[bnum].loc.y + physical_tile_type(block_loc)->pin_height_offset[pnum]; - int layer_num = place_ctx.block_locs[bnum].loc.layer; + int layer_num = block_locs[bnum].loc.layer; num_sink_layer[layer_num]++; if (x < xmin[layer_num]) { xmin[layer_num] = x; @@ -913,7 +909,8 @@ static void update_bb(ClusterNetId net_id, const t_bb *curr_bb_edge, *curr_bb_coord; auto& device_ctx = g_vpr_ctx.device(); - auto& place_move_ctx = g_placer_ctx.move(); + auto& placer_state = placer_state_ref->get(); + auto& place_move_ctx = placer_state.move(); const int num_layers = device_ctx.grid.get_num_layers(); @@ -1182,7 +1179,8 @@ static void update_layer_bb(ClusterNetId net_id, t_physical_tile_loc pin_new_loc, bool is_output_pin) { auto& device_ctx = g_vpr_ctx.device(); - auto& place_move_ctx = g_placer_ctx.move(); + auto& placer_state = placer_state_ref->get(); + auto& place_move_ctx = placer_state.move(); pin_new_loc.x = max(min(pin_new_loc.x, device_ctx.grid.width() - 2), 1); //-2 for no perim channels pin_new_loc.y = max(min(pin_new_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels @@ -1524,53 +1522,49 @@ static void get_bb_from_scratch(ClusterNetId net_id, t_bb& coords, t_bb& num_on_edges, vtr::NdMatrixProxy num_sink_pin_layer) { - int pnum, x, y, pin_layer, xmin, xmax, ymin, ymax, layer_min, layer_max; - int xmin_edge, xmax_edge, ymin_edge, ymax_edge, layer_min_edge, layer_max_edge; - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; + const auto& placer_state = placer_state_ref->get(); + auto& block_locs = placer_state.block_locs(); ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id); - pnum = net_pin_to_tile_pin_index(net_id, 0); + t_pl_loc block_loc = block_locs[bnum].loc; + int pnum = placer_state.blk_loc_registry().net_pin_to_tile_pin_index(net_id, 0); VTR_ASSERT_SAFE(pnum >= 0); - x = place_ctx.block_locs[bnum].loc.x - + physical_tile_type(bnum)->pin_width_offset[pnum]; - y = place_ctx.block_locs[bnum].loc.y - + physical_tile_type(bnum)->pin_height_offset[pnum]; - pin_layer = place_ctx.block_locs[bnum].loc.layer; + int x = block_loc.x + physical_tile_type(block_loc)->pin_width_offset[pnum]; + int y = block_loc.y + physical_tile_type(block_loc)->pin_height_offset[pnum]; + int pin_layer = block_loc.layer; x = max(min(x, grid.width() - 2), 1); y = max(min(y, grid.height() - 2), 1); pin_layer = max(min(pin_layer, grid.get_num_layers() - 1), 0); - xmin = x; - ymin = y; - layer_min = pin_layer; - xmax = x; - ymax = y; - layer_max = pin_layer; + int xmin = x; + int ymin = y; + int layer_min = pin_layer; + int xmax = x; + int ymax = y; + int layer_max = pin_layer; - xmin_edge = 1; - ymin_edge = 1; - layer_min_edge = 1; - xmax_edge = 1; - ymax_edge = 1; - layer_max_edge = 1; + int xmin_edge = 1; + int ymin_edge = 1; + int layer_min_edge = 1; + int xmax_edge = 1; + int ymax_edge = 1; + int layer_max_edge = 1; for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) { num_sink_pin_layer[layer_num] = 0; } - for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { + for (ClusterPinId pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { bnum = cluster_ctx.clb_nlist.pin_block(pin_id); - pnum = tile_pin_index(pin_id); - x = place_ctx.block_locs[bnum].loc.x - + physical_tile_type(bnum)->pin_width_offset[pnum]; - y = place_ctx.block_locs[bnum].loc.y - + physical_tile_type(bnum)->pin_height_offset[pnum]; - pin_layer = place_ctx.block_locs[bnum].loc.layer; + block_loc = block_locs[bnum].loc; + pnum = placer_state.blk_loc_registry().tile_pin_index(pin_id); + x = block_locs[bnum].loc.x + physical_tile_type(block_loc)->pin_width_offset[pnum]; + y = block_locs[bnum].loc.y + physical_tile_type(block_loc)->pin_height_offset[pnum]; + pin_layer = block_locs[bnum].loc.layer; /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. * * This is because channels do not go out of the 0..grid.width()-2, 0..grid.height()-2 range, and * @@ -1649,6 +1643,11 @@ static void get_layer_bb_from_scratch(ClusterNetId net_id, std::vector& coords, vtr::NdMatrixProxy layer_pin_sink_count) { auto& device_ctx = g_vpr_ctx.device(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& grid = device_ctx.grid; + auto& placer_state = placer_state_ref->get(); + auto& block_locs = placer_state.block_locs(); + const int num_layers = device_ctx.grid.get_num_layers(); std::vector xmin(num_layers, OPEN); std::vector xmax(num_layers, OPEN); @@ -1661,17 +1660,13 @@ static void get_layer_bb_from_scratch(ClusterNetId net_id, std::vector num_sink_pin_layer(num_layers, 0); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); - auto& grid = device_ctx.grid; ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id); - int pnum_src = net_pin_to_tile_pin_index(net_id, 0); + t_pl_loc block_loc = block_locs[bnum].loc; + int pnum_src = placer_state.blk_loc_registry().net_pin_to_tile_pin_index(net_id, 0); VTR_ASSERT_SAFE(pnum_src >= 0); - int x_src = place_ctx.block_locs[bnum].loc.x - + physical_tile_type(bnum)->pin_width_offset[pnum_src]; - int y_src = place_ctx.block_locs[bnum].loc.y - + physical_tile_type(bnum)->pin_height_offset[pnum_src]; + int x_src = block_loc.x + physical_tile_type(block_loc)->pin_width_offset[pnum_src]; + int y_src = block_loc.y + physical_tile_type(block_loc)->pin_height_offset[pnum_src]; x_src = max(min(x_src, grid.width() - 2), 1); y_src = max(min(y_src, grid.height() - 2), 1); @@ -1690,16 +1685,15 @@ static void get_layer_bb_from_scratch(ClusterNetId net_id, ymax_edge[layer_num] = 1; } - for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { + for (ClusterPinId pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { bnum = cluster_ctx.clb_nlist.pin_block(pin_id); - int pnum = tile_pin_index(pin_id); - int layer = place_ctx.block_locs[bnum].loc.layer; + block_loc = block_locs[bnum].loc; + int pnum = placer_state.blk_loc_registry().tile_pin_index(pin_id); + int layer = block_locs[bnum].loc.layer; VTR_ASSERT_SAFE(layer >= 0 && layer < num_layers); num_sink_pin_layer[layer]++; - int x = place_ctx.block_locs[bnum].loc.x - + physical_tile_type(bnum)->pin_width_offset[pnum]; - int y = place_ctx.block_locs[bnum].loc.y - + physical_tile_type(bnum)->pin_height_offset[pnum]; + int x = block_loc.x + physical_tile_type(block_loc)->pin_width_offset[pnum]; + int y = block_loc.y + physical_tile_type(block_loc)->pin_height_offset[pnum]; /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. * * This is because channels do not go out of the 0..grid.width()-2, 0..grid.height()-2 range, and * @@ -1759,12 +1753,9 @@ static void get_layer_bb_from_scratch(ClusterNetId net_id, static double get_net_cost(ClusterNetId net_id, const t_bb& bb) { /* Finds the cost due to one net by looking at its coordinate bounding * * box. */ - - double ncost, crossing; auto& cluster_ctx = g_vpr_ctx.clustering(); - crossing = wirelength_crossing_count( - cluster_ctx.clb_nlist.net_pins(net_id).size()); + double crossing = wirelength_crossing_count( cluster_ctx.clb_nlist.net_pins(net_id).size()); /* Could insert a check for xmin == xmax. In that case, assume * * connection will be made with no bends and hence no x-cost. * @@ -1773,11 +1764,9 @@ static double get_net_cost(ClusterNetId net_id, const t_bb& bb) { /* Cost = wire length along channel * cross_count / average * * channel capacity. Do this for x, then y direction and add. */ - ncost = (bb.xmax - bb.xmin + 1) * crossing - * chanx_place_cost_fac[bb.ymax][bb.ymin - 1]; - - ncost += (bb.ymax - bb.ymin + 1) * crossing - * chany_place_cost_fac[bb.xmax][bb.xmin - 1]; + double ncost; + ncost = (bb.xmax - bb.xmin + 1) * crossing * chanx_place_cost_fac[bb.ymax][bb.ymin - 1]; + ncost += (bb.ymax - bb.ymin + 1) * crossing * chany_place_cost_fac[bb.xmax][bb.xmin - 1]; return (ncost); } @@ -1822,11 +1811,9 @@ static double get_net_layer_bb_wire_cost(ClusterNetId /* net_id */, } static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bb) { - double ncost, crossing; auto& cluster_ctx = g_vpr_ctx.clustering(); - crossing = wirelength_crossing_count( - cluster_ctx.clb_nlist.net_pins(net_id).size()); + double crossing = wirelength_crossing_count(cluster_ctx.clb_nlist.net_pins(net_id).size()); /* Could insert a check for xmin == xmax. In that case, assume * * connection will be made with no bends and hence no x-cost. * @@ -1835,11 +1822,12 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bb) { /* Cost = wire length along channel * cross_count / average * * channel capacity. Do this for x, then y direction and add. */ + double ncost; ncost = (bb.xmax - bb.xmin + 1) * crossing; ncost += (bb.ymax - bb.ymin + 1) * crossing; - return (ncost); + return ncost; } static double get_net_wirelength_from_layer_bb(ClusterNetId /* net_id */, @@ -1958,14 +1946,14 @@ double comp_bb_cost(e_cost_methods method) { double cost = 0; double expected_wirelength = 0.0; auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_move_ctx = g_placer_ctx.mutable_move(); + auto& placer_state = placer_state_ref->get(); + auto& place_move_ctx = placer_state.mutable_move(); for (auto net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */ if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */ /* Small nets don't use incremental updating on their bounding boxes, * * so they can use a fast bounding box calculator. */ - if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET - && method == NORMAL) { + if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET && method == e_cost_methods::NORMAL) { get_bb_from_scratch(net_id, place_move_ctx.bb_coords[net_id], place_move_ctx.bb_num_on_edges[net_id], @@ -1978,12 +1966,12 @@ double comp_bb_cost(e_cost_methods method) { pl_net_cost.net_cost[net_id] = get_net_cost(net_id, place_move_ctx.bb_coords[net_id]); cost += pl_net_cost.net_cost[net_id]; - if (method == CHECK) + if (method == e_cost_methods::CHECK) expected_wirelength += get_net_wirelength_estimate(net_id, place_move_ctx.bb_coords[net_id]); } } - if (method == CHECK) { + if (method == e_cost_methods::CHECK) { VTR_LOG("\n"); VTR_LOG("BB estimate of min-dist (placement) wire length: %.0f\n", expected_wirelength); @@ -1995,14 +1983,14 @@ double comp_layer_bb_cost(e_cost_methods method) { double cost = 0; double expected_wirelength = 0.0; auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_move_ctx = g_placer_ctx.mutable_move(); + auto& placer_state = placer_state_ref->get(); + auto& place_move_ctx = placer_state.mutable_move(); for (auto net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */ if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */ /* Small nets don't use incremental updating on their bounding boxes, * * so they can use a fast bounding box calculator. */ - if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET - && method == NORMAL) { + if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET && method == e_cost_methods::NORMAL) { get_layer_bb_from_scratch(net_id, place_move_ctx.layer_bb_num_on_edges[net_id], place_move_ctx.layer_bb_coords[net_id], @@ -2017,14 +2005,14 @@ double comp_layer_bb_cost(e_cost_methods method) { place_move_ctx.layer_bb_coords[net_id], place_move_ctx.num_sink_pin_layer[size_t(net_id)]); cost += pl_net_cost.net_cost[net_id]; - if (method == CHECK) + if (method == e_cost_methods::CHECK) expected_wirelength += get_net_wirelength_from_layer_bb(net_id, place_move_ctx.layer_bb_coords[net_id], place_move_ctx.num_sink_pin_layer[size_t(net_id)]); } } - if (method == CHECK) { + if (method == e_cost_methods::CHECK) { VTR_LOG("\n"); VTR_LOG("BB estimate of min-dist (placement) wire length: %.0f\n", expected_wirelength); @@ -2035,7 +2023,8 @@ double comp_layer_bb_cost(e_cost_methods method) { void update_move_nets() { /* update net cost functions and reset flags. */ auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_move_ctx = g_placer_ctx.mutable_move(); + auto& placer_state = placer_state_ref->get(); + auto& place_move_ctx = placer_state.mutable_move(); for (const ClusterNetId ts_net : ts_info.ts_nets_to_update) { ClusterNetId net_id = ts_net; @@ -2072,9 +2061,11 @@ void recompute_costs_from_scratch(const t_placer_opts& placer_opts, const PlaceDelayModel* delay_model, const PlacerCriticalities* criticalities, t_placer_costs* costs) { + auto& placer_state = placer_state_ref->get(); + auto check_and_print_cost = [](double new_cost, double old_cost, - const std::string& cost_name) { + const std::string& cost_name) -> void { if (!vtr::isclose(new_cost, old_cost, ERROR_TOL, 0.)) { std::string msg = vtr::string_fmt( "in recompute_costs_from_scratch: new_%s = %g, old %s = %g, ERROR_TOL = %g\n", @@ -2089,7 +2080,7 @@ void recompute_costs_from_scratch(const t_placer_opts& placer_opts, if (placer_opts.place_algorithm.is_timing_driven()) { double new_timing_cost = 0.; - comp_td_costs(delay_model, *criticalities, &new_timing_cost); + comp_td_costs(delay_model, *criticalities, placer_state, &new_timing_cost); check_and_print_cost(new_timing_cost, costs->timing_cost, "timing_cost"); costs->timing_cost = new_timing_cost; } else { @@ -2163,8 +2154,7 @@ void alloc_and_load_chan_w_factors_for_place_cost(float place_cost_exp) { for (size_t high = 1; high < device_ctx.grid.height(); high++) { chanx_place_cost_fac[high][high] = device_ctx.chan_width.x_list[high]; for (size_t low = 0; low < high; low++) { - chanx_place_cost_fac[high][low] = chanx_place_cost_fac[high - 1][low] - + device_ctx.chan_width.x_list[high]; + chanx_place_cost_fac[high][low] = chanx_place_cost_fac[high - 1][low] + device_ctx.chan_width.x_list[high]; } } @@ -2203,8 +2193,7 @@ void alloc_and_load_chan_w_factors_for_place_cost(float place_cost_exp) { for (size_t high = 1; high < device_ctx.grid.width(); high++) { chany_place_cost_fac[high][high] = device_ctx.chan_width.y_list[high]; for (size_t low = 0; low < high; low++) { - chany_place_cost_fac[high][low] = chany_place_cost_fac[high - 1][low] - + device_ctx.chan_width.y_list[high]; + chany_place_cost_fac[high][low] = chany_place_cost_fac[high - 1][low] + device_ctx.chan_width.y_list[high]; } } diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 0e72611b323..8ab0af8e554 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -4,15 +4,23 @@ #include "move_transactions.h" #include "place_util.h" +class PlacerState; + /** - * @brief The method used to calculate palcement cost - * @details For comp_cost. NORMAL means use the method that generates updateable bounding boxes for speed. + * @brief The error tolerance due to round off for the total cost computation. + * When we check it from scratch vs. incrementally. 0.01 means that there is a 1% error tolerance. + */ +constexpr double ERROR_TOL = .01; + +/** + * @brief The method used to calculate placement cost + * @details For comp_cost. NORMAL means use the method that generates updatable bounding boxes for speed. * CHECK means compute all bounding boxes from scratch using a very simple routine to allow checks * of the other costs. * NORMAL: Compute cost efficiently using incremental techniques. * CHECK: Brute-force cost computation; useful to validate the more complex incremental cost update code. */ -enum e_cost_methods { +enum class e_cost_methods { NORMAL, CHECK }; @@ -147,3 +155,5 @@ void init_try_swap_net_cost_structs(size_t num_nets, bool cube_bb); * @brief Free (layer_)ts_bb_edge_new, (layer_)ts_bb_coord_new, ts_layer_sink_pin_count, and ts_nets_to_update data structures. */ void free_try_swap_net_cost_structs(); + +void set_net_handlers_placer_state(PlacerState& placer_state); diff --git a/vpr/src/place/noc_place_checkpoint.cpp b/vpr/src/place/noc_place_checkpoint.cpp index 3e11d9c8eb6..b23cc4cf4ed 100644 --- a/vpr/src/place/noc_place_checkpoint.cpp +++ b/vpr/src/place/noc_place_checkpoint.cpp @@ -13,38 +13,39 @@ NoCPlacementCheckpoint::NoCPlacementCheckpoint() router_locations_.clear(); // Initializes checkpoint locations to invalid - for (const auto& router_bid : router_bids) { + for (const ClusterBlockId router_bid : router_bids) { router_locations_[router_bid] = t_pl_loc(OPEN, OPEN, OPEN, OPEN); } } -void NoCPlacementCheckpoint::save_checkpoint(double cost) { +void NoCPlacementCheckpoint::save_checkpoint(double cost, const vtr::vector_map& block_locs) { const auto& noc_ctx = g_vpr_ctx.noc(); - const auto& place_ctx = g_vpr_ctx.placement(); const std::vector& router_bids = noc_ctx.noc_traffic_flows_storage.get_router_clusters_in_netlist(); - for (const auto& router_bid : router_bids) { - t_pl_loc loc = place_ctx.block_locs[router_bid].loc; + for (const ClusterBlockId router_bid : router_bids) { + t_pl_loc loc = block_locs[router_bid].loc; router_locations_[router_bid] = loc; } valid_ = true; cost_ = cost; } -void NoCPlacementCheckpoint::restore_checkpoint(t_placer_costs& costs) { +void NoCPlacementCheckpoint::restore_checkpoint(t_placer_costs& costs, + BlkLocRegistry& blk_loc_registry) { const auto& noc_ctx = g_vpr_ctx.noc(); const auto& device_ctx = g_vpr_ctx.device(); - auto& place_ctx = g_vpr_ctx.mutable_placement(); + GridBlock& grid_blocks = blk_loc_registry.mutable_grid_blocks(); + const auto& block_locs = blk_loc_registry.block_locs(); // Get all physical routers const auto& noc_phy_routers = noc_ctx.noc_model.get_noc_routers(); // Clear all physical routers in placement - for (const auto& phy_router : noc_phy_routers) { - auto phy_loc = phy_router.get_router_physical_location(); + for (const NocRouter& phy_router : noc_phy_routers) { + t_physical_tile_loc phy_loc = phy_router.get_router_physical_location(); - place_ctx.grid_blocks.set_usage(phy_loc, 0); + grid_blocks.set_usage(phy_loc, 0); auto tile = device_ctx.grid.get_physical_type(phy_loc); for (const auto& sub_tile : tile->sub_tiles) { @@ -52,23 +53,18 @@ void NoCPlacementCheckpoint::restore_checkpoint(t_placer_costs& costs) { for (int k = 0; k < capacity.total(); k++) { const t_pl_loc loc(phy_loc, k + capacity.low); - if (place_ctx.grid_blocks.block_at_location(loc) != INVALID_BLOCK_ID) { - place_ctx.grid_blocks.set_block_at_location(loc, EMPTY_BLOCK_ID); - } + grid_blocks.set_block_at_location(loc, ClusterBlockId::INVALID()); } } } // Place routers based on router_locations_ - for (const auto& router_loc : router_locations_) { - ClusterBlockId router_blk_id = router_loc.first; - t_pl_loc location = router_loc.second; - - set_block_location(router_blk_id, location); + for (const auto& [router_blk_id, location] : router_locations_) { + blk_loc_registry.set_block_location(router_blk_id, location); } // Re-initialize routes and static variables that keep track of NoC-related costs - reinitialize_noc_routing(costs, {}); + reinitialize_noc_routing(costs, {}, block_locs); } bool NoCPlacementCheckpoint::is_valid() const { diff --git a/vpr/src/place/noc_place_checkpoint.h b/vpr/src/place/noc_place_checkpoint.h index 11df0a50732..3eb631c273e 100644 --- a/vpr/src/place/noc_place_checkpoint.h +++ b/vpr/src/place/noc_place_checkpoint.h @@ -36,16 +36,19 @@ class NoCPlacementCheckpoint { /** * @brief Saves the current NoC router placement as a checkpoint * - * @param cost: The placement cost associated with the current placement + * @param cost The placement cost associated with the current placement + * @param block_locs Stores where each clustered block (including NoC routers) is placed at. */ - void save_checkpoint(double cost); + void save_checkpoint(double cost, const vtr::vector_map& block_locs); /** * @brief Loads the save checkpoint into global placement data structues. * - * @param costs: Used to load NoC related costs for the checkpoint + * @param costs Used to load NoC related costs for the checkpoint + * @param blk_loc_registry To be updated with the save checkpoint for NoC router locations. */ - void restore_checkpoint(t_placer_costs& costs); + void restore_checkpoint(t_placer_costs& costs, + BlkLocRegistry& blk_loc_registry); /** * @brief Indicates whether the object is empty or it has already stored a diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp index d8d9b481ec1..1ac01bfd5d0 100644 --- a/vpr/src/place/noc_place_utils.cpp +++ b/vpr/src/place/noc_place_utils.cpp @@ -44,7 +44,8 @@ static std::unordered_set affected_noc_links; */ static bool select_random_router_cluster(ClusterBlockId& b_from, t_pl_loc& from, - t_logical_block_type_ptr& cluster_from_type); + t_logical_block_type_ptr& cluster_from_type, + const vtr::vector_map& block_locs); /** * @brief Given two traffic flow routes, finds links that appear @@ -58,7 +59,8 @@ static bool select_random_router_cluster(ClusterBlockId& b_from, static std::vector find_affected_links_by_flow_reroute(std::vector& prev_links, std::vector& curr_links); -void initial_noc_routing(const vtr::vector>& new_traffic_flow_routes) { +void initial_noc_routing(const vtr::vector>& new_traffic_flow_routes, + const vtr::vector_map& block_locs) { // need to update the link usages within after routing all the traffic flows // also need to route all the traffic flows and store them auto& noc_ctx = g_vpr_ctx.mutable_noc(); @@ -74,14 +76,14 @@ void initial_noc_routing(const vtr::vector& curr_traffic_flow_route = new_traffic_flow_routes.empty() - ? route_traffic_flow(traffic_flow_id, noc_ctx.noc_model, noc_traffic_flows_storage, *noc_ctx.noc_flows_router) + ? route_traffic_flow(traffic_flow_id, noc_ctx.noc_model, noc_traffic_flows_storage, *noc_ctx.noc_flows_router, block_locs) : new_traffic_flow_routes[traffic_flow_id]; if (!new_traffic_flow_routes.empty()) { @@ -94,7 +96,8 @@ void initial_noc_routing(const vtr::vector>& new_traffic_flow_routes) { + const vtr::vector>& new_traffic_flow_routes, + const vtr::vector_map& block_locs) { // used to access NoC links and modify them auto& noc_ctx = g_vpr_ctx.mutable_noc(); @@ -107,7 +110,7 @@ void reinitialize_noc_routing(t_placer_costs& costs, } // Route traffic flows and update link bandwidth usage - initial_noc_routing(new_traffic_flow_routes); + initial_noc_routing(new_traffic_flow_routes, block_locs); // Initialize traffic_flow_costs costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost(); @@ -116,7 +119,8 @@ void reinitialize_noc_routing(t_placer_costs& costs, } void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, - NocCostTerms& delta_c) { + NocCostTerms& delta_c, + const vtr::vector_map& block_locs) { /* For speed, delta_c is passed by reference instead of being returned. * We expect delta cost terms to be zero to ensure correctness. */ @@ -142,7 +146,7 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move // check if the current moved block is a noc router if (noc_traffic_flows_storage.check_if_cluster_block_has_traffic_flows(blk)) { // current block is a router, so re-route all the traffic flows it is a part of - re_route_associated_traffic_flows(blk, noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, updated_traffic_flows); + re_route_associated_traffic_flows(blk, noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, updated_traffic_flows, block_locs); } } @@ -157,8 +161,7 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move // calculate the new aggregate bandwidth and latency costs for the affected traffic flow proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = calculate_traffic_flow_aggregate_bandwidth_cost(traffic_flow_route, curr_traffic_flow); std::tie(proposed_traffic_flow_costs[traffic_flow_id].latency, - proposed_traffic_flow_costs[traffic_flow_id].latency_overrun) - = calculate_traffic_flow_latency_cost(traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow); + proposed_traffic_flow_costs[traffic_flow_id].latency_overrun) = calculate_traffic_flow_latency_cost(traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow); // compute how much the aggregate bandwidth and latency costs change with this swap delta_c.aggregate_bandwidth += proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth - traffic_flow_costs[traffic_flow_id].aggregate_bandwidth; @@ -204,10 +207,8 @@ void commit_noc_costs() { std::vector& route_traffic_flow(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, - NocRouting& noc_flows_router) { - // provides the positions where the affected blocks have moved to - auto& place_ctx = g_vpr_ctx.placement(); - + NocRouting& noc_flows_router, + const vtr::vector_map& block_locs) { // get the traffic flow with the current id const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id); @@ -216,8 +217,8 @@ std::vector& route_traffic_flow(NocTrafficFlowId traffic_flow_id, ClusterBlockId logical_sink_router_block_id = curr_traffic_flow.sink_router_cluster_id; // get the ids of the hard router blocks where the logical router cluster blocks have been placed - NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(place_ctx.block_locs[logical_source_router_block_id].loc); - NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(place_ctx.block_locs[logical_sink_router_block_id].loc); + NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(block_locs[logical_source_router_block_id].loc); + NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(block_locs[logical_sink_router_block_id].loc); // route the current traffic flow std::vector& curr_traffic_flow_route = noc_traffic_flows_storage.get_mutable_traffic_flow_route(traffic_flow_id); @@ -247,7 +248,8 @@ void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, - std::unordered_set& updated_traffic_flows) { + std::unordered_set& updated_traffic_flows, + const vtr::vector_map& block_locs) { // get all the associated traffic flows for the logical router cluster block const auto& assoc_traffic_flows = noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(moved_block_router_id); @@ -260,7 +262,7 @@ void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, std::vector prev_traffic_flow_links = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id); // now update the current traffic flow by re-routing it based on the new locations of its src and destination routers - re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_model, noc_flows_router); + re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_model, noc_flows_router, block_locs); // now make sure we don't update this traffic flow a second time by adding it to the group of updated traffic flows updated_traffic_flows.insert(traffic_flow_id); @@ -281,7 +283,8 @@ void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, } } -void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affected) { +void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affected, + const vtr::vector_map& block_locs) { auto& noc_ctx = g_vpr_ctx.mutable_noc(); NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage; @@ -306,7 +309,7 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect // first check to see whether we have already reverted the current traffic flow and only revert it if we haven't already. if (reverted_traffic_flows.find(traffic_flow_id) == reverted_traffic_flows.end()) { // Revert the traffic flow route by re-routing it - re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router); + re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, block_locs); // make sure we do not revert this traffic flow again reverted_traffic_flows.insert(traffic_flow_id); @@ -319,7 +322,8 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, - NocRouting& noc_flows_router) { + NocRouting& noc_flows_router, + const vtr::vector_map& block_locs) { // get the current traffic flow info const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id); @@ -332,7 +336,7 @@ void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, update_traffic_flow_link_usage(curr_traffic_flow_route, noc_model, -1, curr_traffic_flow.traffic_flow_bandwidth); // now get the re-routed traffic flow route and increment all the link usages with this reverted route - std::vector& re_routed_traffic_flow_route = route_traffic_flow(traffic_flow_id, noc_model, noc_traffic_flows_storage, noc_flows_router); + std::vector& re_routed_traffic_flow_route = route_traffic_flow(traffic_flow_id, noc_model, noc_traffic_flows_storage, noc_flows_router, block_locs); update_traffic_flow_link_usage(re_routed_traffic_flow_route, noc_model, 1, curr_traffic_flow.traffic_flow_bandwidth); } @@ -451,13 +455,12 @@ double comp_noc_congestion_cost() { return congestion_cost; } -int check_noc_placement_costs(const t_placer_costs& costs, double error_tolerance, const t_noc_opts& noc_opts) { +int check_noc_placement_costs(const t_placer_costs& costs, + double error_tolerance, + const t_noc_opts& noc_opts, + const vtr::vector_map& block_locs) { int error = 0; - NocCostTerms cost_check{0.0, 0.0, 0.0, 0.0}; - - // get current router block locations - auto& place_ctx = g_vpr_ctx.placement(); - const vtr::vector_map& placed_cluster_block_locations = place_ctx.block_locs; + NocCostTerms cost_check{0.0, 0.0, 0.0, 0.0};; auto& noc_ctx = g_vpr_ctx.noc(); const NocStorage& noc_model = noc_ctx.noc_model; @@ -485,8 +488,8 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc ClusterBlockId logical_sink_router_block_id = curr_traffic_flow.sink_router_cluster_id; // get the ids of the hard router blocks where the logical router cluster blocks have been placed - NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(placed_cluster_block_locations[logical_source_router_block_id].loc); - NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(placed_cluster_block_locations[logical_sink_router_block_id].loc); + NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(block_locs[logical_source_router_block_id].loc); + NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(block_locs[logical_sink_router_block_id].loc); // route the current traffic flow temp_noc_routing_algorithm->route_flow(source_router_block_id, sink_router_block_id, traffic_flow_id, temp_found_noc_route, noc_model); @@ -500,7 +503,7 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc cost_check.latency_overrun += curr_traffic_flow_latency_overrun_cost; // increase bandwidth utilization for the links that constitute the current flow's route - for (auto& link_id : temp_found_noc_route) { + for (NocLinkId link_id : temp_found_noc_route) { auto& link = temp_noc_link_storage[link_id]; double curr_link_bw_util = link.get_bandwidth_usage(); link.set_bandwidth_usage(curr_link_bw_util + curr_traffic_flow.traffic_flow_bandwidth); @@ -657,7 +660,16 @@ double calculate_noc_cost(const NocCostTerms& cost_terms, * is computed. Weighting factors determine the contribution of each * normalized term to the sum. */ - cost = noc_opts.noc_placement_weighting * (cost_terms.aggregate_bandwidth * norm_factors.aggregate_bandwidth * noc_opts.noc_aggregate_bandwidth_weighting + cost_terms.latency * norm_factors.latency * noc_opts.noc_latency_weighting + cost_terms.latency_overrun * norm_factors.latency_overrun * noc_opts.noc_latency_constraints_weighting + cost_terms.congestion * norm_factors.congestion * noc_opts.noc_congestion_weighting); + // clang-format off + cost = + noc_opts.noc_placement_weighting * + ( + cost_terms.aggregate_bandwidth * norm_factors.aggregate_bandwidth * noc_opts.noc_aggregate_bandwidth_weighting + + cost_terms.latency * norm_factors.latency * noc_opts.noc_latency_weighting + + cost_terms.latency_overrun * norm_factors.latency_overrun * noc_opts.noc_latency_constraints_weighting + + cost_terms.congestion * norm_factors.congestion * noc_opts.noc_congestion_weighting + ); + // clang-format on return cost; } @@ -780,12 +792,12 @@ bool check_for_router_swap(int user_supplied_noc_router_swap_percentage) { return (vtr::irand(99) < user_supplied_noc_router_swap_percentage); } -static bool select_random_router_cluster(ClusterBlockId& b_from, t_pl_loc& from, t_logical_block_type_ptr& cluster_from_type) { +static bool select_random_router_cluster(ClusterBlockId& b_from, + t_pl_loc& from, + t_logical_block_type_ptr& cluster_from_type, + const vtr::vector_map& block_locs) { // need to access all the router cluster blocks in the design auto& noc_ctx = g_vpr_ctx.noc(); - // - auto& place_ctx = g_vpr_ctx.placement(); - // auto& cluster_ctx = g_vpr_ctx.clustering(); // get a reference to the collection of router cluster blocks in the design @@ -803,11 +815,11 @@ static bool select_random_router_cluster(ClusterBlockId& b_from, t_pl_loc& from, b_from = router_clusters[random_cluster_block_index]; //check if the block is movable - if (place_ctx.block_locs[b_from].is_fixed) { + if (block_locs[b_from].is_fixed) { return false; } - from = place_ctx.block_locs[b_from].loc; + from = block_locs[b_from].loc; cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); @@ -815,17 +827,18 @@ static bool select_random_router_cluster(ClusterBlockId& b_from, t_pl_loc& from, return true; } -e_create_move propose_router_swap(t_pl_blocks_to_be_moved& blocks_affected, float rlim) { +e_create_move propose_router_swap(t_pl_blocks_to_be_moved& blocks_affected, + float rlim, + const BlkLocRegistry& blk_loc_registry) { // block ID for the randomly selected router cluster ClusterBlockId b_from; // current location of the randomly selected router cluster t_pl_loc from; // logical block type of the randomly selected router cluster t_logical_block_type_ptr cluster_from_type; - bool random_select_success = false; // Randomly select a router cluster - random_select_success = select_random_router_cluster(b_from, from, cluster_from_type); + bool random_select_success = select_random_router_cluster(b_from, from, cluster_from_type, blk_loc_registry.block_locs()); // If a random router cluster could not be selected, no move can be proposed if (!random_select_success) { @@ -835,11 +848,11 @@ e_create_move propose_router_swap(t_pl_blocks_to_be_moved& blocks_affected, floa // now choose a compatible block to swap with t_pl_loc to; to.layer = from.layer; - if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) { + if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from, blk_loc_registry)) { return e_create_move::ABORT; } - e_create_move create_move = ::create_move(blocks_affected, b_from, to); + e_create_move create_move = ::create_move(blocks_affected, b_from, to, blk_loc_registry); //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap if (!floorplan_legal(blocks_affected)) { @@ -849,11 +862,10 @@ e_create_move propose_router_swap(t_pl_blocks_to_be_moved& blocks_affected, floa return create_move; } -void write_noc_placement_file(const std::string& file_name) { +void write_noc_placement_file(const std::string& file_name, + const vtr::vector_map& block_locs) { // we need the clustered netlist to get the names of all the NoC router cluster blocks auto& cluster_ctx = g_vpr_ctx.clustering(); - // we need to the placement context to determine the final placed locations of the NoC router cluster blocks - auto& placement_ctx = g_vpr_ctx.placement(); // we need the NoC context to identify the physical router ids based on their locations on the device auto& noc_ctx = g_vpr_ctx.noc(); @@ -879,9 +891,6 @@ void write_noc_placement_file(const std::string& file_name) { // get a reference to the clustered netlist const ClusteredNetlist& cluster_block_netlist = cluster_ctx.clb_nlist; - // get a reference to the clustered block placement locations - const vtr::vector_map& clustered_block_placed_locations = placement_ctx.block_locs; - // go through all the cluster blocks and write out their information in the placement file for (const auto& single_cluster_id : router_clusters) { // check if the current cluster id is valid @@ -894,7 +903,7 @@ void write_noc_placement_file(const std::string& file_name) { const std::string& cluster_name = cluster_block_netlist.block_name(single_cluster_id); //get the placement location of the current router cluster block - const t_block_loc& cluster_location = clustered_block_placed_locations[single_cluster_id]; + const t_block_loc& cluster_location = block_locs[single_cluster_id]; // now get the corresponding physical router block id the cluster is located on NocRouterId physical_router_cluster_is_placed_on = noc_model.get_router_at_grid_location(cluster_location.loc); @@ -907,25 +916,25 @@ void write_noc_placement_file(const std::string& file_name) { noc_placement_file.close(); } -bool noc_routing_has_cycle() { +bool noc_routing_has_cycle(const vtr::vector_map& block_locs) { // used to access traffic flow routes const auto& noc_ctx = g_vpr_ctx.noc(); // get all traffic flow routes const auto& traffic_flow_routes = noc_ctx.noc_traffic_flows_storage.get_all_traffic_flow_routes(); - bool has_cycle = noc_routing_has_cycle(traffic_flow_routes); + bool has_cycle = noc_routing_has_cycle(traffic_flow_routes, block_locs); return has_cycle; } -bool noc_routing_has_cycle(const vtr::vector>& routes) { +bool noc_routing_has_cycle(const vtr::vector>& routes, + const vtr::vector_map& block_locs) { const auto& noc_ctx = g_vpr_ctx.noc(); - const auto& place_ctx = g_vpr_ctx.placement(); ChannelDependencyGraph channel_dependency_graph(noc_ctx.noc_model, noc_ctx.noc_traffic_flows_storage, routes, - place_ctx.block_locs); + block_locs); bool has_cycles = channel_dependency_graph.has_cycles(); diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h index a0e675ea7d7..99199d7b021 100644 --- a/vpr/src/place/noc_place_utils.h +++ b/vpr/src/place/noc_place_utils.h @@ -53,8 +53,10 @@ struct TrafficFlowPlaceCost { * * @param new_traffic_flow_routes Traffic flow routes used to initialize link bandwidth utilization. * If an empty vector is passed, this function uses a routing algorithm to route traffic flows. + * @param block_locs Contains the location where each clustered block is placed at. */ -void initial_noc_routing(const vtr::vector>& new_traffic_flow_routes); +void initial_noc_routing(const vtr::vector>& new_traffic_flow_routes, + const vtr::vector_map& block_locs); /** * @brief Re-initializes all link bandwidth usages by either re-routing @@ -73,10 +75,12 @@ void initial_noc_routing(const vtr::vector>& new_traffic_flow_routes); + const vtr::vector>& new_traffic_flow_routes, + const vtr::vector_map& block_locs); /** * @brief Goes through all the cluster blocks that were moved @@ -105,14 +109,14 @@ void reinitialize_noc_routing(t_placer_costs& costs, * the moved blocks, their previous locations and their new locations * after being moved. * @param noc_aggregate_bandwidth_delta_c The change in the overall - * NoC aggregate bandwidth cost caused by a placer move is stored - * here. + * NoC aggregate bandwidth cost caused by a placer move is stored here. * @param noc_latency_delta_c The change in the overall - * NoC latency cost caused by a placer move is stored - * here. + * NoC latency cost caused by a placer move is stored here. + * @param block_locs Contains the location where each clustered block is placed at. */ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, - NocCostTerms& delta_c); + NocCostTerms& delta_c, + const vtr::vector_map& block_locs); /** * @brief Updates static datastructures found in 'noc_place_utils.cpp' @@ -157,12 +161,14 @@ void commit_noc_costs(); * within the NoC. Used to get the current traffic flow information. * @param noc_flows_router The packet routing algorithm used to route traffic * flows within the NoC. + * @param block_locs Contains the location where each clustered block is placed at. * @return std::vector& The found route for the traffic flow. */ std::vector& route_traffic_flow(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, - NocRouting& noc_flows_router); + NocRouting& noc_flows_router, + const vtr::vector_map& block_locs); /** * @brief Updates the bandwidth usages of links found in a routed traffic flow. @@ -212,12 +218,14 @@ void update_traffic_flow_link_usage(const std::vector& traffic_flow_r * flows within the NoC. * @param updated_traffic_flows Keeps track of traffic flows that have been * re-routed. Used to prevent re-routing the same traffic flow multiple times. + * @param block_locs Contains the location where each clustered block is placed at. */ void re_route_associated_traffic_flows(ClusterBlockId moved_router_block_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, - std::unordered_set& updated_traffic_flows); + std::unordered_set& updated_traffic_flows, + const vtr::vector_map& block_locs); /** * @brief Used to re-route all the traffic flows associated to logical @@ -231,8 +239,10 @@ void re_route_associated_traffic_flows(ClusterBlockId moved_router_block_id, * the current placement iteration. This includes the cluster ids of * the moved blocks, their previous locations and their new locations * after being moved. + * @param block_locs Contains the location where each clustered block is placed at. */ -void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affected); +void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affected, + const vtr::vector_map& block_locs); /** * @brief Removes the route of a traffic flow and updates the links to indicate @@ -247,11 +257,13 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect * to route traffic flows within the NoC. * @param noc_flows_router The packet routing algorithm used to route traffic * flows within the NoC. + * @param block_locs Contains the location where each clustered block is placed at. */ void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, - NocRouting& noc_flows_router); + NocRouting& noc_flows_router, + const vtr::vector_map& block_locs); /** * @brief Recompute the NoC costs (aggregate bandwidth and latency) by @@ -355,8 +367,12 @@ double comp_noc_congestion_cost(); * indicates that the current NoC costs are within the error tolerance and * a non-zero values indicates the current NoC costs are above the error * tolerance. + * @param block_locs Contains the location where each clustered block is placed at. */ -int check_noc_placement_costs(const t_placer_costs& costs, double error_tolerance, const t_noc_opts& noc_opts); +int check_noc_placement_costs(const t_placer_costs& costs, + double error_tolerance, + const t_noc_opts& noc_opts, + const vtr::vector_map& block_locs); /** * @brief Determines the aggregate bandwidth cost of a routed traffic flow. @@ -526,7 +542,9 @@ bool check_for_router_swap(int user_supplied_noc_router_swap_percentage); * cluster block can travel (this is within the compressed block space) * @return e_create_move Result of proposing the move */ -e_create_move propose_router_swap(t_pl_blocks_to_be_moved& blocks_affected, float rlim); +e_create_move propose_router_swap(t_pl_blocks_to_be_moved& blocks_affected, + float rlim, + const BlkLocRegistry& blk_loc_registry); /** * @brief Writes out the locations of the router cluster blocks in the @@ -542,9 +560,10 @@ e_create_move propose_router_swap(t_pl_blocks_to_be_moved& blocks_affected, floa * * @param file_name The name of the output file that contain the NoC placement * information. - * + * @param block_locs Contains the location where each clustered block is placed at. */ -void write_noc_placement_file(const std::string& file_name); +void write_noc_placement_file(const std::string& file_name, + const vtr::vector_map& block_locs); /** * @brief This function checks whether the routing configuration for NoC traffic flows @@ -557,17 +576,21 @@ void write_noc_placement_file(const std::string& file_name); * the graph has any back edges, i.e. whether a node points to one of its ancestors * during depth-first search traversal. * + * @param block_locs Contains the location where each clustered block is placed at. + * * @return bool Indicates whether NoC traffic flow routes form a cycle. */ -bool noc_routing_has_cycle(); +bool noc_routing_has_cycle(const vtr::vector_map& block_locs); /** * @brief Check if the channel dependency graph created from the given traffic flow routes * has any cycles. * @param routes The user provided traffic flow routes. + * @param block_locs Contains the location where each clustered block is placed at. * @return True if there is any cycles in the channel dependency graph. */ -bool noc_routing_has_cycle(const vtr::vector>& routes); +bool noc_routing_has_cycle(const vtr::vector>& routes, + const vtr::vector_map& block_locs); /** * @brief Invokes NoC SAT router and print new NoC cost terms after SAT router diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index b0134fd31f5..2cd55402d47 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -24,7 +24,6 @@ #include "globals.h" #include "place.h" -#include "placer_globals.h" #include "read_place.h" #include "draw.h" #include "place_and_route.h" @@ -70,6 +69,7 @@ #include "noc_place_utils.h" #include "net_cost_handler.h" +#include "placer_state.h" /* define the RL agent's reward function factor constant. This factor controls the weight of bb cost * * compared to the timing cost in the agent's reward function. The reward is calculated as * @@ -88,11 +88,6 @@ using std::max; using std::min; /************** Types and defines local to place.c ***************************/ - -/* This defines the error tolerance for floating points variables used in * - * cost computation. 0.01 means that there is a 1% error tolerance. */ -static constexpr double ERROR_TOL = .01; - /* This defines the maximum number of swap attempts before invoking the * * once-in-a-while placement legality check as well as floating point * * variables round-offs check. */ @@ -103,13 +98,6 @@ constexpr float INVALID_COST = std::numeric_limits::quiet_NaN(); /********************** Variables local to place.c ***************************/ -/* These file-scoped variables keep track of the number of swaps * - * rejected, accepted or aborted. The total number of swap attempts * - * is the sum of the three number. */ -static int num_swap_rejected = 0; -static int num_swap_accepted = 0; -static int num_swap_aborted = 0; -static int num_ts_called = 0; std::unique_ptr f_move_stats_file(nullptr, vtr::fclose); @@ -204,12 +192,13 @@ static void alloc_and_load_placement_structs(float place_cost_exp, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts, t_direct_inf* directs, - int num_directs); + int num_directs, + PlacerState& placer_state); static void alloc_and_load_try_swap_structs(const bool cube_bb); static void free_try_swap_structs(); -static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts); +static void free_placement_structs(const t_noc_opts& noc_opts); static e_move_result try_swap(const t_annealing_state* state, t_placer_costs* costs, @@ -226,22 +215,26 @@ static e_move_result try_swap(const t_annealing_state* state, MoveTypeStat& move_type_stat, const t_place_algorithm& place_algorithm, float timing_bb_factor, - bool manual_move_enabled); + bool manual_move_enabled, + t_swap_stats& swap_stats, + PlacerState& placer_state); static void check_place(const t_placer_costs& costs, const PlaceDelayModel* delay_model, const PlacerCriticalities* criticalities, const t_place_algorithm& place_algorithm, - const t_noc_opts& noc_opts); + const t_noc_opts& noc_opts, + PlacerState& placer_state); static int check_placement_costs(const t_placer_costs& costs, const PlaceDelayModel* delay_model, const PlacerCriticalities* criticalities, - const t_place_algorithm& place_algorithm); + const t_place_algorithm& place_algorithm, + PlacerState& placer_state); -static int check_placement_consistency(); -static int check_block_placement_consistency(); -static int check_macro_placement_consistency(); +static int check_placement_consistency(const BlkLocRegistry& blk_loc_registry); +static int check_block_placement_consistency(const BlkLocRegistry& blk_loc_registry); +static int check_macro_placement_consistency(const BlkLocRegistry& blk_loc_registry); static float starting_t(const t_annealing_state* state, t_placer_costs* costs, @@ -256,20 +249,25 @@ static float starting_t(const t_annealing_state* state, t_pl_blocks_to_be_moved& blocks_affected, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts, - MoveTypeStat& move_type_stat); + MoveTypeStat& move_type_stat, + t_swap_stats& swap_stats, + PlacerState& placer_state); static int count_connections(); -static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); +static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, + PlacerState& placer_state); -static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); +static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, + PlacerTimingContext& p_timing_ctx); static void invalidate_affected_connections( const t_pl_blocks_to_be_moved& blocks_affected, NetPinTimingInvalidator* pin_tedges_invalidator, TimingInfo* timing_info); -static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks); +static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, + const PlacerState& placer_state); static e_move_result assess_swap(double delta_c, double t); @@ -289,7 +287,8 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, NetPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info); + SetupTimingInfo* timing_info, + PlacerState& placer_state); static void placement_inner_loop(const t_annealing_state* state, const t_placer_opts& placer_opts, @@ -308,21 +307,23 @@ static void placement_inner_loop(const t_annealing_state* state, SetupTimingInfo* timing_info, const t_place_algorithm& place_algorithm, MoveTypeStat& move_type_stat, - float timing_bb_factor); + float timing_bb_factor, + t_swap_stats& swap_stats, + PlacerState& placer_state); static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, const t_analysis_opts& analysis_opts, const SetupTimingInfo& timing_info, const PlacementDelayCalculator& delay_calc, - bool is_flat); + bool is_flat, + const BlkLocRegistry& blk_loc_registry); //calculate the agent's reward and the total process outcome -static void calculate_reward_and_process_outcome( - const t_placer_opts& placer_opts, - const MoveOutcomeStats& move_outcome_stats, - const double& delta_c, - float timing_bb_factor, - MoveGenerator& move_generator); +static void calculate_reward_and_process_outcome(const t_placer_opts& placer_opts, + const MoveOutcomeStats& move_outcome_stats, + double delta_c, + float timing_bb_factor, + MoveGenerator& move_generator); static void print_place_status_header(bool noc_enabled); @@ -336,12 +337,18 @@ static void print_place_status(const t_annealing_state& state, bool noc_enabled, const NocCostTerms& noc_cost_terms); -static void print_resources_utilization(); +static void print_resources_utilization(const BlkLocRegistry& blk_loc_registry); -static void print_placement_swaps_stats(const t_annealing_state& state); +static void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats); static void print_placement_move_types_stats(const MoveTypeStat& move_type_stat); +/** + * @brief Copies the placement location variables into the global placement context. + * @param blk_loc_registry The placement location variables to be copied. + */ +static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry); + /*****************************************************************************/ void try_place(const Netlist<>& net_list, const t_placer_opts& placer_opts, @@ -369,17 +376,12 @@ void try_place(const Netlist<>& net_list, auto& device_ctx = g_vpr_ctx.device(); auto& atom_ctx = g_vpr_ctx.atom(); auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_move_ctx = g_placer_ctx.mutable_move(); - - const auto& p_timing_ctx = g_placer_ctx.timing(); - const auto& p_runtime_ctx = g_placer_ctx.runtime(); auto& timing_ctx = g_vpr_ctx.timing(); auto pre_place_timing_stats = timing_ctx.stats; - int tot_iter, moves_since_cost_recompute, num_connections, - outer_crit_iter_count, inner_recompute_limit; - float first_crit_exponent, first_rlim, first_t; - int first_move_lim; + int tot_iter, moves_since_cost_recompute, num_connections, outer_crit_iter_count; + float first_crit_exponent; + t_placer_costs costs(placer_opts.place_algorithm); @@ -395,24 +397,14 @@ void try_place(const Netlist<>& net_list, std::shared_ptr timing_info; std::shared_ptr placement_delay_calc; std::unique_ptr place_delay_model; - std::unique_ptr move_generator; - std::unique_ptr move_generator2; - std::unique_ptr manual_move_generator; std::unique_ptr placer_setup_slacks; - std::unique_ptr placer_criticalities; std::unique_ptr pin_timing_invalidator; - manual_move_generator = std::make_unique(); - - t_pl_blocks_to_be_moved blocks_affected( - net_list.blocks().size()); + t_pl_blocks_to_be_moved blocks_affected(net_list.blocks().size()); - /* init file scope variables */ - num_swap_rejected = 0; - num_swap_accepted = 0; - num_swap_aborted = 0; - num_ts_called = 0; + // Swap statistics keep record of the number accepted/rejected/aborted swaps. + t_swap_stats swap_stats; if (placer_opts.place_algorithm.is_timing_driven()) { /*do this before the initial placement to avoid messing up the initial placement */ @@ -432,19 +424,26 @@ void try_place(const Netlist<>& net_list, } } - g_vpr_ctx.mutable_placement().cube_bb = is_cube_bb(placer_opts.place_bounding_box_mode, - device_ctx.rr_graph); - const auto& cube_bb = g_vpr_ctx.placement().cube_bb; + g_vpr_ctx.mutable_placement().cube_bb = is_cube_bb(placer_opts.place_bounding_box_mode, device_ctx.rr_graph); + const bool cube_bb = g_vpr_ctx.placement().cube_bb; VTR_LOG("\n"); VTR_LOG("Bounding box mode is %s\n", (cube_bb ? "Cube" : "Per-layer")); VTR_LOG("\n"); - int move_lim = 1; - move_lim = (int)(annealing_sched.inner_num - * pow(net_list.blocks().size(), 1.3333)); + int move_lim = (int)(annealing_sched.inner_num * pow(net_list.blocks().size(), 1.3333)); + + PlacerState placer_state; + auto& place_move_ctx = placer_state.mutable_move(); + auto& blk_loc_registry = placer_state.mutable_blk_loc_registry(); + const auto& p_timing_ctx = placer_state.timing(); + const auto& p_runtime_ctx = placer_state.runtime(); + + + alloc_and_load_placement_structs(placer_opts.place_cost_exp, placer_opts, noc_opts, directs, num_directs, placer_state); + set_net_handlers_placer_state(placer_state); - alloc_and_load_placement_structs(placer_opts.place_cost_exp, placer_opts, noc_opts, directs, num_directs); + std::unique_ptr manual_move_generator = std::make_unique(placer_state); vtr::ScopedStartFinishTimer timer("Placement"); @@ -452,17 +451,13 @@ void try_place(const Netlist<>& net_list, normalize_noc_cost_weighting_factor(const_cast(noc_opts)); } - initial_placement(placer_opts, - placer_opts.constraints_file.c_str(), - noc_opts); + initial_placement(placer_opts, placer_opts.constraints_file.c_str(), noc_opts, blk_loc_registry); //create the move generator based on the chosen strategy - create_move_generators(move_generator, move_generator2, placer_opts, move_lim, noc_opts.noc_centroid_weight); + auto [move_generator, move_generator2] = create_move_generators(placer_state, placer_opts, move_lim, noc_opts.noc_centroid_weight); if (!placer_opts.write_initial_place_file.empty()) { - print_place(nullptr, - nullptr, - placer_opts.write_initial_place_file.c_str()); + print_place(nullptr, nullptr, placer_opts.write_initial_place_file.c_str(), placer_state.block_locs()); } #ifdef ENABLE_ANALYTIC_PLACE @@ -473,33 +468,32 @@ void try_place(const Netlist<>& net_list, * Most of anneal is disabled later by setting initial temperature to 0 and only further optimizes in quench */ if (placer_opts.enable_analytic_placer) { - AnalyticPlacer{}.ap_place(); + AnalyticPlacer{blk_loc_registry}.ap_place(); } #endif /* ENABLE_ANALYTIC_PLACE */ // Update physical pin values - for (auto block_id : cluster_ctx.clb_nlist.blocks()) { - place_sync_external_block_connections(block_id); + for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { + blk_loc_registry.place_sync_external_block_connections(block_id); } const int width_fac = placer_opts.place_chan_width; - init_draw_coords((float)width_fac); + init_draw_coords((float)width_fac, blk_loc_registry); /* Allocated here because it goes into timing critical code where each memory allocation is expensive */ IntraLbPbPinLookup pb_gpin_lookup(device_ctx.logical_block_types); //Enables fast look-up of atom pins connect to CLB pins - ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, - atom_ctx.nlist, pb_gpin_lookup); + ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, pb_gpin_lookup); /* Gets initial cost and loads bounding boxes. */ if (placer_opts.place_algorithm.is_timing_driven()) { if (cube_bb) { - costs.bb_cost = comp_bb_cost(NORMAL); + costs.bb_cost = comp_bb_cost(e_cost_methods::NORMAL); } else { VTR_ASSERT_SAFE(!cube_bb); - costs.bb_cost = comp_layer_bb_cost(NORMAL); + costs.bb_cost = comp_layer_bb_cost(e_cost_methods::NORMAL); } first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ @@ -511,7 +505,7 @@ void try_place(const Netlist<>& net_list, VTR_LOG("\n"); //Update the point-to-point delays from the initial placement - comp_td_connection_delays(place_delay_model.get()); + comp_td_connection_delays(place_delay_model.get(), placer_state); /* * Initialize timing analysis @@ -521,19 +515,15 @@ void try_place(const Netlist<>& net_list, atom_ctx.lookup, p_timing_ctx.connection_delay, is_flat); - placement_delay_calc->set_tsu_margin_relative( - placer_opts.tsu_rel_margin); - placement_delay_calc->set_tsu_margin_absolute( - placer_opts.tsu_abs_margin); + placement_delay_calc->set_tsu_margin_relative(placer_opts.tsu_rel_margin); + placement_delay_calc->set_tsu_margin_absolute(placer_opts.tsu_abs_margin); timing_info = make_setup_timing_info(placement_delay_calc, placer_opts.timing_update_type); - placer_setup_slacks = std::make_unique( - cluster_ctx.clb_nlist, netlist_pin_lookup); + placer_setup_slacks = std::make_unique(cluster_ctx.clb_nlist, netlist_pin_lookup); - placer_criticalities = std::make_unique( - cluster_ctx.clb_nlist, netlist_pin_lookup); + placer_criticalities = std::make_unique(cluster_ctx.clb_nlist, netlist_pin_lookup); pin_timing_invalidator = make_net_pin_timing_invalidator( placer_opts.timing_update_type, @@ -549,9 +539,9 @@ void try_place(const Netlist<>& net_list, crit_params.crit_exponent = first_crit_exponent; crit_params.crit_limit = placer_opts.place_crit_limit; - initialize_timing_info(crit_params, place_delay_model.get(), - placer_criticalities.get(), placer_setup_slacks.get(), - pin_timing_invalidator.get(), timing_info.get(), &costs); + initialize_timing_info(crit_params, place_delay_model.get(), placer_criticalities.get(), + placer_setup_slacks.get(), pin_timing_invalidator.get(), + timing_info.get(), &costs, placer_state); critical_path = timing_info->least_slack_critical_path(); @@ -562,8 +552,8 @@ void try_place(const Netlist<>& net_list, *timing_ctx.graph, *timing_ctx.constraints, *placement_delay_calc, timing_info->analyzer()); - tatum::NodeId debug_tnode = id_or_pin_name_to_tnode( - analysis_opts.echo_dot_timing_graph_node); + tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node); + write_setup_timing_graph_dot( getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"), @@ -581,10 +571,10 @@ void try_place(const Netlist<>& net_list, /* Total cost is the same as wirelength cost normalized*/ if (cube_bb) { - costs.bb_cost = comp_bb_cost(NORMAL); + costs.bb_cost = comp_bb_cost(e_cost_methods::NORMAL); } else { VTR_ASSERT_SAFE(!cube_bb); - costs.bb_cost = comp_layer_bb_cost(NORMAL); + costs.bb_cost = comp_layer_bb_cost(e_cost_methods::NORMAL); } costs.bb_cost_norm = 1 / costs.bb_cost; @@ -616,7 +606,8 @@ void try_place(const Netlist<>& net_list, place_delay_model.get(), placer_criticalities.get(), placer_opts.place_algorithm, - noc_opts); + noc_opts, + placer_state); //Initial placement statistics VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost, @@ -637,8 +628,7 @@ void try_place(const Netlist<>& net_list, VTR_LOG("\n"); VTR_LOG("Initial placement estimated setup slack histogram:\n"); - print_histogram( - create_setup_slack_histogram(*timing_info->setup_analyzer())); + print_histogram(create_setup_slack_histogram(*timing_info->setup_analyzer())); } size_t num_macro_members = 0; @@ -662,15 +652,14 @@ void try_place(const Netlist<>& net_list, std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0, 0); VTR_LOG("Saving initial placement to file: %s\n", filename.c_str()); - print_place(nullptr, nullptr, filename.c_str()); + print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); } - first_move_lim = get_initial_move_lim(placer_opts, annealing_sched); + int first_move_lim = get_initial_move_lim(placer_opts, annealing_sched); + int inner_recompute_limit; if (placer_opts.inner_loop_recompute_divider != 0) { - inner_recompute_limit = (int)(0.5 - + (float)first_move_lim - / (float)placer_opts.inner_loop_recompute_divider); + inner_recompute_limit = static_cast(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider); } else { /*don't do an inner recompute */ inner_recompute_limit = first_move_lim + 1; @@ -680,9 +669,7 @@ void try_place(const Netlist<>& net_list, * the commandline option quench_recompute_divider */ int quench_recompute_limit; if (placer_opts.quench_recompute_divider != 0) { - quench_recompute_limit = (int)(0.5 - + (float)move_lim - / (float)placer_opts.quench_recompute_divider); + quench_recompute_limit = static_cast(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider); } else { /*don't do an quench recompute */ quench_recompute_limit = first_move_lim + 1; @@ -700,15 +687,11 @@ void try_place(const Netlist<>& net_list, move_type_stat.rejected_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); /* Get the first range limiter */ - first_rlim = (float)max(device_ctx.grid.width() - 1, - device_ctx.grid.height() - 1); + float first_rlim = (float)max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); place_move_ctx.first_rlim = first_rlim; - /* Set the temperature low to ensure that initial placement quality will be preserved */ - first_t = EPSILON; - t_annealing_state state(annealing_sched, - first_t, + EPSILON, // Set the temperature low to ensure that initial placement quality will be preserved first_rlim, first_move_lim, first_crit_exponent, @@ -719,7 +702,8 @@ void try_place(const Netlist<>& net_list, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), *move_generator, *manual_move_generator, pin_timing_invalidator.get(), - blocks_affected, placer_opts, noc_opts, move_type_stat); + blocks_affected, placer_opts, noc_opts, move_type_stat, + swap_stats, placer_state); if (!placer_opts.move_stats_file.empty()) { f_move_stats_file = std::unique_ptr( @@ -736,8 +720,9 @@ void try_place(const Netlist<>& net_list, #ifdef ENABLE_ANALYTIC_PLACE // Analytic placer: When enabled, skip most of the annealing and go straight to quench // TODO: refactor goto label. - if (placer_opts.enable_analytic_placer) + if (placer_opts.enable_analytic_placer) { skip_anneal = true; + } #endif /* ENABLE_ANALYTIC_PLACE */ //RL agent state definition @@ -748,7 +733,7 @@ void try_place(const Netlist<>& net_list, //Define the timing bb weight factor for the agent's reward function float timing_bb_factor = REWARD_BB_TIMING_RELATIVE_WEIGHT; - if (skip_anneal == false) { + if (!skip_anneal) { //Table header VTR_LOG("\n"); print_place_status_header(noc_opts.noc); @@ -761,7 +746,7 @@ void try_place(const Netlist<>& net_list, state.crit_exponent, &outer_crit_iter_count, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), pin_timing_invalidator.get(), - timing_info.get()); + timing_info.get(), placer_state); if (placer_opts.place_algorithm.is_timing_driven()) { critical_path = timing_info->least_slack_critical_path(); @@ -772,7 +757,8 @@ void try_place(const Netlist<>& net_list, if (placer_opts.place_checkpointing && agent_state == e_agent_state::LATE_IN_THE_ANNEAL) { - save_placement_checkpoint_if_needed(placement_checkpoint, + save_placement_checkpoint_if_needed(blk_loc_registry.block_locs(), + placement_checkpoint, timing_info, costs, critical_path.delay()); } } @@ -790,7 +776,8 @@ void try_place(const Netlist<>& net_list, *current_move_generator, *manual_move_generator, blocks_affected, timing_info.get(), placer_opts.place_algorithm, move_type_stat, - timing_bb_factor); + timing_bb_factor, + swap_stats, placer_state); //move the update used move_generator to its original variable update_move_generator(move_generator, move_generator2, agent_state, @@ -840,7 +827,7 @@ void try_place(const Netlist<>& net_list, state.crit_exponent, &outer_crit_iter_count, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), pin_timing_invalidator.get(), - timing_info.get()); + timing_info.get(), placer_state); //move the appropriate move_generator to be the current used move generator assign_current_move_generator(move_generator, move_generator2, @@ -856,7 +843,8 @@ void try_place(const Netlist<>& net_list, *current_move_generator, *manual_move_generator, blocks_affected, timing_info.get(), placer_opts.place_quench_algorithm, move_type_stat, - timing_bb_factor); + timing_bb_factor, + swap_stats, placer_state); //move the update used move_generator to its original variable update_move_generator(move_generator, move_generator2, agent_state, @@ -883,16 +871,17 @@ void try_place(const Netlist<>& net_list, crit_params.crit_limit = placer_opts.place_crit_limit; if (placer_opts.place_algorithm.is_timing_driven()) { - perform_full_timing_update(crit_params, place_delay_model.get(), - placer_criticalities.get(), placer_setup_slacks.get(), - pin_timing_invalidator.get(), timing_info.get(), &costs); + perform_full_timing_update(crit_params, place_delay_model.get(), placer_criticalities.get(), + placer_setup_slacks.get(), pin_timing_invalidator.get(), + timing_info.get(), &costs, placer_state); VTR_LOG("post-quench CPD = %g (ns) \n", 1e9 * timing_info->least_slack_critical_path().delay()); } //See if our latest checkpoint is better than the current placement solution if (placer_opts.place_checkpointing) - restore_best_placement(placement_checkpoint, timing_info, costs, + restore_best_placement(placer_state, + placement_checkpoint, timing_info, costs, placer_criticalities, placer_setup_slacks, place_delay_model, pin_timing_invalidator, crit_params, noc_opts); @@ -900,7 +889,7 @@ void try_place(const Netlist<>& net_list, std::string filename = vtr::string_fmt("placement_%03d_%03d.place", state.num_temps + 1, 0); VTR_LOG("Saving final placement to file: %s\n", filename.c_str()); - print_place(nullptr, nullptr, filename.c_str()); + print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); } // TODO: @@ -913,19 +902,20 @@ void try_place(const Netlist<>& net_list, //#endif // Update physical pin values - for (auto block_id : cluster_ctx.clb_nlist.blocks()) { - place_sync_external_block_connections(block_id); + for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { + blk_loc_registry.place_sync_external_block_connections(block_id); } check_place(costs, place_delay_model.get(), placer_criticalities.get(), placer_opts.place_algorithm, - noc_opts); + noc_opts, + placer_state); //Some stats VTR_LOG("\n"); - VTR_LOG("Swaps called: %d\n", num_ts_called); + VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called); report_aborted_moves(); if (placer_opts.place_algorithm.is_timing_driven()) { @@ -948,8 +938,8 @@ void try_place(const Netlist<>& net_list, *timing_info, debug_tnode); } - generate_post_place_timing_reports(placer_opts, analysis_opts, - *timing_info, *placement_delay_calc, is_flat); + generate_post_place_timing_reports(placer_opts, analysis_opts, *timing_info, + *placement_delay_calc, is_flat, blk_loc_registry); /* Print critical path delay metrics */ VTR_LOG("\n"); @@ -976,17 +966,17 @@ void try_place(const Netlist<>& net_list, update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info); // Print out swap statistics - print_resources_utilization(); + print_resources_utilization(blk_loc_registry); - print_placement_swaps_stats(state); + print_placement_swaps_stats(state, swap_stats); print_placement_move_types_stats(move_type_stat); if (noc_opts.noc) { - write_noc_placement_file(noc_opts.noc_placement_file_name); + write_noc_placement_file(noc_opts.noc_placement_file_name, blk_loc_registry.block_locs()); } - free_placement_structs(placer_opts, noc_opts); + free_placement_structs(noc_opts); free_try_swap_arrays(); print_timing_stats("Placement Quench", post_quench_timing_stats, @@ -999,6 +989,8 @@ void try_place(const Netlist<>& net_list, p_runtime_ctx.f_update_td_costs_nets_elapsed_sec, p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec, p_runtime_ctx.f_update_td_costs_total_elapsed_sec); + + copy_locs_to_global_state(blk_loc_registry); } /* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */ @@ -1012,7 +1004,8 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, NetPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info) { + SetupTimingInfo* timing_info, + PlacerState& placer_state) { if (placer_opts.place_algorithm.is_timing_driven()) { /*at each temperature change we update these values to be used */ /*for normalizing the tradeoff between timing and wirelength (bb) */ @@ -1029,8 +1022,8 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, crit_params.crit_limit = placer_opts.place_crit_limit; //Update all timing related classes - perform_full_timing_update(crit_params, delay_model, criticalities, - setup_slacks, pin_timing_invalidator, timing_info, costs); + perform_full_timing_update(crit_params, delay_model, criticalities, setup_slacks, + pin_timing_invalidator, timing_info, costs, placer_state); *outer_crit_iter_count = 0; } @@ -1059,33 +1052,32 @@ static void placement_inner_loop(const t_annealing_state* state, SetupTimingInfo* timing_info, const t_place_algorithm& place_algorithm, MoveTypeStat& move_type_stat, - float timing_bb_factor) { - int inner_crit_iter_count, inner_iter; - - int inner_placement_save_count = 0; //How many times have we dumped placement to a file this temperature? + float timing_bb_factor, + t_swap_stats& swap_stats, + PlacerState& placer_state) { + //How many times have we dumped placement to a file this temperature? + int inner_placement_save_count = 0; stats->reset(); - inner_crit_iter_count = 1; - bool manual_move_enabled = false; /* Inner loop begins */ - for (inner_iter = 0; inner_iter < state->move_lim; inner_iter++) { + for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < state->move_lim; inner_iter++) { e_move_result swap_result = try_swap(state, costs, move_generator, manual_move_generator, timing_info, pin_timing_invalidator, blocks_affected, delay_model, criticalities, setup_slacks, placer_opts, noc_opts, move_type_stat, place_algorithm, - timing_bb_factor, manual_move_enabled); + timing_bb_factor, manual_move_enabled, swap_stats, placer_state); if (swap_result == ACCEPTED) { /* Move was accepted. Update statistics that are useful for the annealing schedule. */ stats->single_swap_update(*costs); - num_swap_accepted++; + swap_stats.num_swap_accepted++; } else if (swap_result == ABORTED) { - num_swap_aborted++; + swap_stats.num_swap_aborted++; } else { // swap_result == REJECTED - num_swap_rejected++; + swap_stats.num_swap_rejected++; } if (place_algorithm.is_timing_driven()) { @@ -1105,9 +1097,9 @@ static void placement_inner_loop(const t_annealing_state* state, crit_params.crit_limit = placer_opts.place_crit_limit; //Update all timing related classes - perform_full_timing_update(crit_params, delay_model, - criticalities, setup_slacks, pin_timing_invalidator, - timing_info, costs); + perform_full_timing_update(crit_params, delay_model, criticalities, + setup_slacks, pin_timing_invalidator, + timing_info, costs, placer_state); } inner_crit_iter_count++; } @@ -1127,16 +1119,12 @@ static void placement_inner_loop(const t_annealing_state* state, } if (placer_opts.placement_saves_per_temperature >= 1 && inner_iter > 0 - && (inner_iter + 1) - % (state->move_lim - / placer_opts.placement_saves_per_temperature) - == 0) { + && (inner_iter + 1) % (state->move_lim / placer_opts.placement_saves_per_temperature) == 0) { std::string filename = vtr::string_fmt("placement_%03d_%03d.place", state->num_temps + 1, inner_placement_save_count); - VTR_LOG( - "Saving placement to file at temperature move %d / %d: %s\n", - inner_iter, state->move_lim, filename.c_str()); - print_place(nullptr, nullptr, filename.c_str()); + VTR_LOG("Saving placement to file at temperature move %d / %d: %s\n", + inner_iter, state->move_lim, filename.c_str()); + print_place(nullptr, nullptr, filename.c_str(), placer_state.block_locs()); ++inner_placement_save_count; } } @@ -1147,12 +1135,14 @@ static void placement_inner_loop(const t_annealing_state* state, /*only count non-global connections */ static int count_connections() { + auto& cluster_ctx = g_vpr_ctx.clustering(); + int count = 0; - auto& cluster_ctx = g_vpr_ctx.clustering(); - for (auto net_id : cluster_ctx.clb_nlist.nets()) { - if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) { continue; + } count += cluster_ctx.clb_nlist.net_sinks(net_id).size(); } @@ -1174,7 +1164,9 @@ static float starting_t(const t_annealing_state* state, t_pl_blocks_to_be_moved& blocks_affected, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts, - MoveTypeStat& move_type_stat) { + MoveTypeStat& move_type_stat, + t_swap_stats& swap_stats, + PlacerState& placer_state) { if (annealing_sched.type == USER_SCHED) { return (annealing_sched.init_t); } @@ -1207,17 +1199,17 @@ static float starting_t(const t_annealing_state* state, manual_move_generator, timing_info, pin_timing_invalidator, blocks_affected, delay_model, criticalities, setup_slacks, placer_opts, noc_opts, move_type_stat, placer_opts.place_algorithm, - REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled); + REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled, swap_stats, placer_state); if (swap_result == ACCEPTED) { num_accepted++; av += costs->cost; sum_of_squares += costs->cost * costs->cost; - num_swap_accepted++; + swap_stats.num_swap_accepted++; } else if (swap_result == ABORTED) { - num_swap_aborted++; + swap_stats.num_swap_aborted++; } else { - num_swap_rejected++; + swap_stats.num_swap_rejected++; } } @@ -1276,13 +1268,17 @@ static e_move_result try_swap(const t_annealing_state* state, MoveTypeStat& move_type_stat, const t_place_algorithm& place_algorithm, float timing_bb_factor, - bool manual_move_enabled) { + bool manual_move_enabled, + t_swap_stats& swap_stats, + PlacerState& placer_state) { /* Picks some block and moves it to another spot. If this spot is * * occupied, switch the blocks. Assess the change in cost function. * * rlim is the range limiter. * * Returns whether the swap is accepted, rejected or aborted. * * Passes back the new value of the cost functions. */ + const auto& block_locs = placer_state.block_locs(); + float rlim_escape_fraction = placer_opts.rlim_escape_fraction; float timing_tradeoff = placer_opts.timing_tradeoff; @@ -1293,7 +1289,7 @@ static e_move_result try_swap(const t_annealing_state* state, // move type and block type chosen by the agent t_propose_action proposed_action{e_move_type::UNIFORM, -1}; - num_ts_called++; + swap_stats.num_ts_called++; MoveOutcomeStats move_outcome_stats; @@ -1324,14 +1320,16 @@ static e_move_result try_swap(const t_annealing_state* state, //When manual move toggle button is active, the manual move window asks the user for input. if (manual_move_enabled) { #ifndef NO_GRAPHICS - create_move_outcome = manual_move_display_and_propose(manual_move_generator, blocks_affected, proposed_action.move_type, rlim, placer_opts, criticalities); + create_move_outcome = manual_move_display_and_propose(manual_move_generator, blocks_affected, + proposed_action.move_type, rlim, placer_opts, + criticalities); #else //NO_GRAPHICS //Cast to void to explicitly avoid warning. (void)manual_move_generator; #endif //NO_GRAPHICS } else if (router_block_move) { // generate a move where two random router blocks are swapped - create_move_outcome = propose_router_swap(blocks_affected, rlim); + create_move_outcome = propose_router_swap(blocks_affected, rlim, placer_state.blk_loc_registry()); proposed_action.move_type = e_move_type::UNIFORM; } else { //Generate a new move (perturbation) used to explore the space of possible placements @@ -1343,7 +1341,9 @@ static e_move_result try_swap(const t_annealing_state* state, } LOG_MOVE_STATS_PROPOSED(t, blocks_affected); - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tBefore move Place cost %e, bb_cost %e, timing cost %e\n", costs->cost, costs->bb_cost, costs->timing_cost); + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, + "\t\tBefore move Place cost %e, bb_cost %e, timing cost %e\n", + costs->cost, costs->bb_cost, costs->timing_cost); e_move_result move_outcome = e_move_result::ABORTED; @@ -1371,7 +1371,7 @@ static e_move_result try_swap(const t_annealing_state* state, */ /* Update the block positions */ - apply_move_blocks(blocks_affected); + apply_move_blocks(blocks_affected, placer_state.mutable_blk_loc_registry()); //Find all the nets affected by this swap and update the wiring costs. //This cost value doesn't depend on the timing info. @@ -1392,7 +1392,7 @@ static e_move_result try_swap(const t_annealing_state* state, /* Update the connection_timing_cost and connection_delay * * values from the temporary values. */ - commit_td_cost(blocks_affected); + commit_td_cost(blocks_affected, placer_state); /* Update timing information. Since we are analyzing setup slacks, * * we only update those values and keep the criticalities stale * @@ -1405,11 +1405,11 @@ static e_move_result try_swap(const t_annealing_state* state, criticalities->disable_update(); setup_slacks->enable_update(); update_timing_classes(crit_params, timing_info, criticalities, - setup_slacks, pin_timing_invalidator); + setup_slacks, pin_timing_invalidator, placer_state); /* Get the setup slack analysis cost */ //TODO: calculate a weighted average of the slack cost and wiring cost - delta_c = analyze_setup_slack_cost(setup_slacks) * costs->timing_cost_norm; + delta_c = analyze_setup_slack_cost(setup_slacks, placer_state) * costs->timing_cost_norm; } else if (place_algorithm == CRITICALITY_TIMING_PLACE) { /* Take delta_c as a combination of timing and wiring cost. In * addition to `timing_tradeoff`, we normalize the cost values */ @@ -1422,8 +1422,7 @@ static e_move_result try_swap(const t_annealing_state* state, timing_delta_c, costs->timing_cost_norm); delta_c = (1 - timing_tradeoff) * bb_delta_c * costs->bb_cost_norm - + timing_tradeoff * timing_delta_c - * costs->timing_cost_norm; + + timing_tradeoff * timing_delta_c * costs->timing_cost_norm; } else { VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE); VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, @@ -1436,7 +1435,7 @@ static e_move_result try_swap(const t_annealing_state* state, NocCostTerms noc_delta_c; // change in NoC cost /* Update the NoC datastructure and costs*/ if (noc_opts.noc) { - find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c); + find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c, block_locs); // Include the NoC delta costs in the total cost change for this swap delta_c += calculate_noc_cost(noc_delta_c, costs->noc_cost_norm_factors, noc_opts); @@ -1462,7 +1461,7 @@ static e_move_result try_swap(const t_annealing_state* state, //Commit the setup slack information //The timing delay and cost values should be committed already - commit_setup_slacks(setup_slacks); + commit_setup_slacks(setup_slacks, placer_state); } if (place_algorithm == CRITICALITY_TIMING_PLACE) { @@ -1476,14 +1475,14 @@ static e_move_result try_swap(const t_annealing_state* state, /* Update the connection_timing_cost and connection_delay * * values from the temporary values. */ - commit_td_cost(blocks_affected); + commit_td_cost(blocks_affected, placer_state); } /* Update net cost functions and reset flags. */ update_move_nets(); /* Update clb data structures since we kept the move. */ - commit_move_blocks(blocks_affected); + commit_move_blocks(blocks_affected, placer_state.mutable_grid_blocks()); if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat ++move_type_stat.accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; @@ -1507,14 +1506,14 @@ static e_move_result try_swap(const t_annealing_state* state, reset_move_nets(); /* Restore the place_ctx.block_locs data structures to their state before the move. */ - revert_move_blocks(blocks_affected); + revert_move_blocks(blocks_affected, placer_state.mutable_blk_loc_registry()); if (place_algorithm == SLACK_TIMING_PLACE) { /* Revert the timing delays and costs to pre-update values. */ /* These routines must be called after reverting the block moves. */ //TODO: make this process incremental - comp_td_connection_delays(delay_model); - comp_td_costs(delay_model, *criticalities, &costs->timing_cost); + comp_td_connection_delays(delay_model, placer_state); + comp_td_costs(delay_model, *criticalities, placer_state, &costs->timing_cost); /* Re-invalidate the affected sink pins since the proposed * * move is rejected, and the same blocks are reverted to * @@ -1524,16 +1523,16 @@ static e_move_result try_swap(const t_annealing_state* state, /* Revert the timing update */ update_timing_classes(crit_params, timing_info, criticalities, - setup_slacks, pin_timing_invalidator); + setup_slacks, pin_timing_invalidator, placer_state); VTR_ASSERT_SAFE_MSG( - verify_connection_setup_slacks(setup_slacks), + verify_connection_setup_slacks(setup_slacks, placer_state), "The current setup slacks should be identical to the values before the try swap timing info update."); } if (place_algorithm == CRITICALITY_TIMING_PLACE) { /* Unstage the values stored in proposed_* data structures */ - revert_td_cost(blocks_affected); + revert_td_cost(blocks_affected, placer_state.mutable_timing()); } if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat @@ -1541,26 +1540,23 @@ static e_move_result try_swap(const t_annealing_state* state, } /* Revert the traffic flow routes within the NoC*/ if (noc_opts.noc) { - revert_noc_traffic_flow_routes(blocks_affected); + revert_noc_traffic_flow_routes(blocks_affected, block_locs); } } move_outcome_stats.delta_cost_norm = delta_c; - move_outcome_stats.delta_bb_cost_norm = bb_delta_c - * costs->bb_cost_norm; - move_outcome_stats.delta_timing_cost_norm = timing_delta_c - * costs->timing_cost_norm; + move_outcome_stats.delta_bb_cost_norm = bb_delta_c * costs->bb_cost_norm; + move_outcome_stats.delta_timing_cost_norm = timing_delta_c * costs->timing_cost_norm; move_outcome_stats.delta_bb_cost_abs = bb_delta_c; move_outcome_stats.delta_timing_cost_abs = timing_delta_c; - LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, - (move_outcome ? "ACCEPTED" : "REJECTED"), ""); + LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome ? "ACCEPTED" : "REJECTED"), ""); } move_outcome_stats.outcome = move_outcome; // If we force a router block move then it was not proposed by the - // move generator so we should not calculate the reward and update + // move generator, so we should not calculate the reward and update // the move generators status since this outcome is not a direct // consequence of the move generator if (!router_block_move) { @@ -1577,13 +1573,14 @@ static e_move_result try_swap(const t_annealing_state* state, /* Clear the data structure containing block move info */ blocks_affected.clear_move_blocks(); - //VTR_ASSERT(check_macro_placement_consistency() == 0); #if 0 // Check that each accepted swap yields a valid placement. This will // greatly slow the placer, but can debug some issues. check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts); #endif - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tAfter move Place cost %e, bb_cost %e, timing cost %e\n", costs->cost, costs->bb_cost, costs->timing_cost); + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, + "\t\tAfter move Place cost %e, bb_cost %e, timing cost %e\n", + costs->cost, costs->bb_cost, costs->timing_cost); return move_outcome; } @@ -1592,7 +1589,7 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, bool cube_bb; const int number_layers = g_vpr_ctx.device().grid.get_num_layers(); - // If the FPGA has only layer, then we can only use cube bounding box + // If the FPGA has only one layer, then we can only use cube bounding box if (number_layers == 1) { cube_bb = true; } else { @@ -1634,15 +1631,13 @@ static void update_placement_cost_normalization_factors(t_placer_costs* costs, c /* Update the cost normalization factors */ costs->update_norm_factors(); - // update the noc normalization factors if the palcement includes the NoC + // update the noc normalization factors if the placement includes the NoC if (noc_opts.noc) { update_noc_normalization_factors(*costs); } // update the current total placement cost costs->cost = get_total_cost(costs, placer_opts, noc_opts); - - return; } /** @@ -1692,11 +1687,12 @@ static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_ * value suddenly got very good due to the block move, while a good slack value * got very bad, perhaps even worse than the original worse slack value. */ -static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks) { +static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, + const PlacerState& placer_state) { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& clb_nlist = cluster_ctx.clb_nlist; - const auto& p_timing_ctx = g_placer_ctx.timing(); + const auto& p_timing_ctx = placer_state.timing(); const auto& connection_setup_slack = p_timing_ctx.connection_setup_slack; //Find the original/proposed setup slacks of pins with modified values @@ -1762,11 +1758,12 @@ static e_move_result assess_swap(double delta_c, double t) { * All the connections have already been gathered by blocks_affected.affected_pins * after running the routine find_affected_nets_and_update_costs() in try_swap(). */ -static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { +static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, + PlacerState& placer_state) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& clb_nlist = cluster_ctx.clb_nlist; - auto& p_timing_ctx = g_placer_ctx.mutable_timing(); + auto& p_timing_ctx = placer_state.mutable_timing(); auto& connection_delay = p_timing_ctx.connection_delay; auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay; auto& connection_timing_cost = p_timing_ctx.connection_timing_cost; @@ -1787,16 +1784,17 @@ static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { //Reverts modifications to proposed_connection_delay and proposed_connection_timing_cost based on //the move proposed in blocks_affected -static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { +static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, + PlacerTimingContext& p_timing_ctx) { #ifndef VTR_ASSERT_SAFE_ENABLED - static_cast(blocks_affected); + (void)blocks_affected; + (void)p_timing_ctx; #else //Invalidate temp delay & timing cost values to match sanity checks in //comp_td_connection_cost() auto& cluster_ctx = g_vpr_ctx.clustering(); auto& clb_nlist = cluster_ctx.clb_nlist; - auto& p_timing_ctx = g_placer_ctx.mutable_timing(); auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay; auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost; @@ -1818,10 +1816,9 @@ static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { * Invalidate all the timing graph edges associated with these connections via * the NetPinTimingInvalidator class. */ -static void invalidate_affected_connections( - const t_pl_blocks_to_be_moved& blocks_affected, - NetPinTimingInvalidator* pin_tedges_invalidator, - TimingInfo* timing_info) { +static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, + NetPinTimingInvalidator* pin_tedges_invalidator, + TimingInfo* timing_info) { VTR_ASSERT_SAFE(timing_info); VTR_ASSERT_SAFE(pin_tedges_invalidator); @@ -1837,27 +1834,24 @@ static void alloc_and_load_placement_structs(float place_cost_exp, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts, t_direct_inf* directs, - int num_directs) { - int max_pins_per_clb; - unsigned int ipin; - + int num_directs, + PlacerState& placer_state) { const auto& device_ctx = g_vpr_ctx.device(); const auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.mutable_placement(); - const auto& cube_bb = place_ctx.cube_bb; - - auto& p_timing_ctx = g_placer_ctx.mutable_timing(); - auto& place_move_ctx = g_placer_ctx.mutable_move(); + place_ctx.lock_loc_vars(); size_t num_nets = cluster_ctx.clb_nlist.nets().size(); const int num_layers = device_ctx.grid.get_num_layers(); - init_placement_context(); + auto& block_locs = placer_state.mutable_block_locs(); + auto& grid_blocks = placer_state.mutable_grid_blocks(); + init_placement_context(block_locs, grid_blocks); - max_pins_per_clb = 0; - for (const auto& type : device_ctx.physical_tile_types) { + int max_pins_per_clb = 0; + for (const t_physical_tile_type& type : device_ctx.physical_tile_types) { max_pins_per_clb = max(max_pins_per_clb, type.num_pins); } @@ -1865,23 +1859,19 @@ static void alloc_and_load_placement_structs(float place_cost_exp, /* Allocate structures associated with timing driven placement */ /* [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] */ - p_timing_ctx.connection_delay = make_net_pins_matrix( - (const Netlist<>&)cluster_ctx.clb_nlist, 0.f); - p_timing_ctx.proposed_connection_delay = make_net_pins_matrix( - cluster_ctx.clb_nlist, 0.f); + auto& p_timing_ctx = placer_state.mutable_timing(); + + p_timing_ctx.connection_delay = make_net_pins_matrix((const Netlist<>&)cluster_ctx.clb_nlist, 0.f); + p_timing_ctx.proposed_connection_delay = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.f); - p_timing_ctx.connection_setup_slack = make_net_pins_matrix( - cluster_ctx.clb_nlist, std::numeric_limits::infinity()); + p_timing_ctx.connection_setup_slack = make_net_pins_matrix(cluster_ctx.clb_nlist, std::numeric_limits::infinity()); - p_timing_ctx.connection_timing_cost = PlacerTimingCosts( - cluster_ctx.clb_nlist); - p_timing_ctx.proposed_connection_timing_cost = make_net_pins_matrix< - double>(cluster_ctx.clb_nlist, 0.); + p_timing_ctx.connection_timing_cost = PlacerTimingCosts(cluster_ctx.clb_nlist); + p_timing_ctx.proposed_connection_timing_cost = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.); p_timing_ctx.net_timing_cost.resize(num_nets, 0.); - for (auto net_id : cluster_ctx.clb_nlist.nets()) { - for (ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); - ipin++) { + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) { p_timing_ctx.connection_delay[net_id][ipin] = 0; p_timing_ctx.proposed_connection_delay[net_id][ipin] = INVALID_DELAY; @@ -1897,11 +1887,12 @@ static void alloc_and_load_placement_structs(float place_cost_exp, init_place_move_structs(num_nets); - if (cube_bb) { + auto& place_move_ctx = placer_state.mutable_move(); + if (place_ctx.cube_bb) { place_move_ctx.bb_coords.resize(num_nets, t_bb()); place_move_ctx.bb_num_on_edges.resize(num_nets, t_bb()); } else { - VTR_ASSERT_SAFE(!cube_bb); + VTR_ASSERT_SAFE(!place_ctx.cube_bb); place_move_ctx.layer_bb_num_on_edges.resize(num_nets, std::vector(num_layers, t_2D_bb())); place_move_ctx.layer_bb_coords.resize(num_nets, std::vector(num_layers, t_2D_bb())); } @@ -1914,7 +1905,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp, alloc_and_load_chan_w_factors_for_place_cost(place_cost_exp); - alloc_and_load_try_swap_structs(cube_bb); + alloc_and_load_try_swap_structs(place_ctx.cube_bb); place_ctx.pl_macros = alloc_and_load_placement_macros(directs, num_directs); @@ -1925,33 +1916,11 @@ static void alloc_and_load_placement_structs(float place_cost_exp, /* Frees the major structures needed by the placer (and not needed * * elsewhere). */ -static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) { - auto& place_move_ctx = g_placer_ctx.mutable_move(); - - if (placer_opts.place_algorithm.is_timing_driven()) { - auto& p_timing_ctx = g_placer_ctx.mutable_timing(); - - vtr::release_memory(p_timing_ctx.connection_timing_cost); - vtr::release_memory(p_timing_ctx.connection_delay); - vtr::release_memory(p_timing_ctx.connection_setup_slack); - vtr::release_memory(p_timing_ctx.proposed_connection_timing_cost); - vtr::release_memory(p_timing_ctx.proposed_connection_delay); - vtr::release_memory(p_timing_ctx.net_timing_cost); - } - +static void free_placement_structs(const t_noc_opts& noc_opts) { free_placement_macros_structs(); free_place_move_structs(); - vtr::release_memory(place_move_ctx.bb_coords); - vtr::release_memory(place_move_ctx.bb_num_on_edges); - vtr::release_memory(place_move_ctx.bb_coords); - - vtr::release_memory(place_move_ctx.layer_bb_num_on_edges); - vtr::release_memory(place_move_ctx.layer_bb_coords); - - place_move_ctx.num_sink_pin_layer.clear(); - free_chan_w_factors_for_place_cost(); free_try_swap_structs(); @@ -1985,7 +1954,8 @@ static void check_place(const t_placer_costs& costs, const PlaceDelayModel* delay_model, const PlacerCriticalities* criticalities, const t_place_algorithm& place_algorithm, - const t_noc_opts& noc_opts) { + const t_noc_opts& noc_opts, + PlacerState& placer_state) { /* Checks that the placement has not confused our data structures. * * i.e. the clb and block structures agree about the locations of * * every block, blocks are in legal spots, etc. Also recomputes * @@ -1994,16 +1964,15 @@ static void check_place(const t_placer_costs& costs, int error = 0; - error += check_placement_consistency(); - error += check_placement_costs(costs, delay_model, criticalities, - place_algorithm); - error += check_placement_floorplanning(); + error += check_placement_consistency(placer_state.blk_loc_registry()); + error += check_placement_costs(costs, delay_model, criticalities, place_algorithm, placer_state); + error += check_placement_floorplanning(placer_state.block_locs()); if (noc_opts.noc) { // check the NoC costs during placement if the user is using the NoC supported flow - error += check_noc_placement_costs(costs, ERROR_TOL, noc_opts); + error += check_noc_placement_costs(costs, ERROR_TOL, noc_opts, placer_state.block_locs()); // make sure NoC routing configuration does not create any cycles in CDG - error += (int)noc_routing_has_cycle(); + error += (int)noc_routing_has_cycle(placer_state.block_locs()); } if (error == 0) { @@ -2021,18 +1990,19 @@ static void check_place(const t_placer_costs& costs, static int check_placement_costs(const t_placer_costs& costs, const PlaceDelayModel* delay_model, const PlacerCriticalities* criticalities, - const t_place_algorithm& place_algorithm) { + const t_place_algorithm& place_algorithm, + PlacerState& placer_state) { int error = 0; double bb_cost_check; double timing_cost_check; - const auto& cube_bb = g_vpr_ctx.placement().cube_bb; + const bool cube_bb = g_vpr_ctx.placement().cube_bb; if (cube_bb) { - bb_cost_check = comp_bb_cost(CHECK); + bb_cost_check = comp_bb_cost(e_cost_methods::CHECK); } else { VTR_ASSERT_SAFE(!cube_bb); - bb_cost_check = comp_layer_bb_cost(CHECK); + bb_cost_check = comp_layer_bb_cost(e_cost_methods::CHECK); } if (fabs(bb_cost_check - costs.bb_cost) > costs.bb_cost * ERROR_TOL) { @@ -2043,12 +2013,9 @@ static int check_placement_costs(const t_placer_costs& costs, } if (place_algorithm.is_timing_driven()) { - comp_td_costs(delay_model, *criticalities, &timing_cost_check); + comp_td_costs(delay_model, *criticalities, placer_state, &timing_cost_check); //VTR_LOG("timing_cost recomputed from scratch: %g\n", timing_cost_check); - if (fabs( - timing_cost_check - - costs.timing_cost) - > costs.timing_cost * ERROR_TOL) { + if (fabs(timing_cost_check - costs.timing_cost) > costs.timing_cost * ERROR_TOL) { VTR_LOG_ERROR( "timing_cost_check: %g and timing_cost: %g differ in check_place.\n", timing_cost_check, costs.timing_cost); @@ -2058,20 +2025,19 @@ static int check_placement_costs(const t_placer_costs& costs, return error; } -static int check_placement_consistency() { - return check_block_placement_consistency() - + check_macro_placement_consistency(); +static int check_placement_consistency(const BlkLocRegistry& blk_loc_registry) { + return check_block_placement_consistency(blk_loc_registry) + check_macro_placement_consistency(blk_loc_registry); } -static int check_block_placement_consistency() { - int error = 0; - +static int check_block_placement_consistency(const BlkLocRegistry& blk_loc_registry) { auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); auto& device_ctx = g_vpr_ctx.device(); + const auto& block_locs = blk_loc_registry.block_locs(); + const auto& grid_blocks = blk_loc_registry.grid_blocks(); + + int error = 0; - vtr::vector bdone( - cluster_ctx.clb_nlist.blocks().size(), 0); + vtr::vector bdone(cluster_ctx.clb_nlist.blocks().size(), 0); /* Step through device grid and placement. Check it against blocks */ for (int layer_num = 0; layer_num < (int)device_ctx.grid.get_num_layers(); layer_num++) { @@ -2079,30 +2045,31 @@ static int check_block_placement_consistency() { for (int j = 0; j < (int)device_ctx.grid.height(); j++) { const t_physical_tile_loc tile_loc(i, j, layer_num); const auto& type = device_ctx.grid.get_physical_type(tile_loc); - if (place_ctx.grid_blocks.get_usage(tile_loc) > type->capacity) { + if (grid_blocks.get_usage(tile_loc) > type->capacity) { VTR_LOG_ERROR( "%d blocks were placed at grid location (%d,%d,%d), but location capacity is %d.\n", - place_ctx.grid_blocks.get_usage(tile_loc), i, j, layer_num, - type->capacity); + grid_blocks.get_usage(tile_loc), i, j, layer_num, type->capacity); error++; } int usage_check = 0; for (int k = 0; k < type->capacity; k++) { - auto bnum = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}); - if (EMPTY_BLOCK_ID == bnum || INVALID_BLOCK_ID == bnum) + ClusterBlockId bnum = grid_blocks.block_at_location({i, j, k, layer_num}); + if (bnum == ClusterBlockId::INVALID()) { continue; + } auto logical_block = cluster_ctx.clb_nlist.block_type(bnum); auto physical_tile = type; + t_pl_loc block_loc = block_locs[bnum].loc; - if (physical_tile_type(bnum) != physical_tile) { + if (physical_tile_type(block_loc) != physical_tile) { VTR_LOG_ERROR( "Block %zu type (%s) does not match grid location (%zu,%zu, %d) type (%s).\n", size_t(bnum), logical_block->name, i, j, layer_num, physical_tile->name); error++; } - auto& loc = place_ctx.block_locs[bnum].loc; + auto& loc = block_locs[bnum].loc; if (loc.x != i || loc.y != j || loc.layer != layer_num || !is_sub_tile_compatible(physical_tile, logical_block, loc.sub_tile)) { @@ -2121,10 +2088,10 @@ static int check_block_placement_consistency() { ++usage_check; bdone[bnum]++; } - if (usage_check != place_ctx.grid_blocks.get_usage(tile_loc)) { + if (usage_check != grid_blocks.get_usage(tile_loc)) { VTR_LOG_ERROR( "%d block(s) were placed at location (%d,%d,%d), but location contains %d block(s).\n", - place_ctx.grid_blocks.get_usage(tile_loc), + grid_blocks.get_usage(tile_loc), tile_loc.x, tile_loc.y, tile_loc.layer_num, @@ -2136,7 +2103,7 @@ static int check_block_placement_consistency() { } /* Check that every block exists in the device_ctx.grid and cluster_ctx.blocks arrays somewhere. */ - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) if (bdone[blk_id] != 1) { VTR_LOG_ERROR("Block %zu listed %d times in device context grid.\n", size_t(blk_id), bdone[blk_id]); @@ -2146,26 +2113,25 @@ static int check_block_placement_consistency() { return error; } -int check_macro_placement_consistency() { - int error = 0; - auto& place_ctx = g_vpr_ctx.placement(); +int check_macro_placement_consistency(const BlkLocRegistry& blk_loc_registry) { + const auto& pl_macros = g_vpr_ctx.placement().pl_macros; + const auto& block_locs = blk_loc_registry.block_locs(); + const auto& grid_blocks = blk_loc_registry.grid_blocks(); - auto& pl_macros = place_ctx.pl_macros; + int error = 0; /* Check the pl_macro placement are legal - blocks are in the proper relative position. */ - for (size_t imacro = 0; imacro < place_ctx.pl_macros.size(); imacro++) { + for (size_t imacro = 0; imacro < pl_macros.size(); imacro++) { auto head_iblk = pl_macros[imacro].members[0].blk_index; - for (size_t imember = 0; imember < pl_macros[imacro].members.size(); - imember++) { + for (size_t imember = 0; imember < pl_macros[imacro].members.size(); imember++) { auto member_iblk = pl_macros[imacro].members[imember].blk_index; - // Compute the suppossed member's x,y,z location - t_pl_loc member_pos = place_ctx.block_locs[head_iblk].loc - + pl_macros[imacro].members[imember].offset; + // Compute the supposed member's x,y,z location + t_pl_loc member_pos = block_locs[head_iblk].loc + pl_macros[imacro].members[imember].offset; // Check the place_ctx.block_locs data structure first - if (place_ctx.block_locs[member_iblk].loc != member_pos) { + if (block_locs[member_iblk].loc != member_pos) { VTR_LOG_ERROR( "Block %zu in pl_macro #%zu is not placed in the proper orientation.\n", size_t(member_iblk), imacro); @@ -2173,8 +2139,7 @@ int check_macro_placement_consistency() { } // Then check the place_ctx.grid data structure - if (place_ctx.grid_blocks.block_at_location(member_pos) - != member_iblk) { + if (grid_blocks.block_at_location(member_pos) != member_iblk) { VTR_LOG_ERROR( "Block %zu in pl_macro #%zu is not placed in the proper orientation.\n", size_t(member_iblk), imacro); @@ -2182,6 +2147,7 @@ int check_macro_placement_consistency() { } } // Finish going through all the members } // Finish going through all the macros + return error; } @@ -2212,12 +2178,13 @@ static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, const t_analysis_opts& analysis_opts, const SetupTimingInfo& timing_info, const PlacementDelayCalculator& delay_calc, - bool is_flat) { + bool is_flat, + const BlkLocRegistry& blk_loc_registry) { auto& timing_ctx = g_vpr_ctx.timing(); auto& atom_ctx = g_vpr_ctx.atom(); - VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, - *timing_ctx.graph, delay_calc, is_flat); + VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, + delay_calc, is_flat, blk_loc_registry); resolver.set_detail_level(analysis_opts.timing_report_detail); tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph, @@ -2296,22 +2263,20 @@ static void print_place_status(const t_annealing_state& state, fflush(stdout); } -static void print_resources_utilization() { - auto& place_ctx = g_vpr_ctx.placement(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& device_ctx = g_vpr_ctx.device(); +static void print_resources_utilization(const BlkLocRegistry& blk_loc_registry) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& device_ctx = g_vpr_ctx.device(); + const auto& block_locs = blk_loc_registry.block_locs(); int max_block_name = 0; int max_tile_name = 0; //Record the resource requirement std::map num_type_instances; - std::map> - num_placed_instances; - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - auto block_loc = place_ctx.block_locs[blk_id]; - auto loc = block_loc.loc; + std::map> num_placed_instances; + + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + const t_pl_loc& loc = block_locs[blk_id].loc; auto physical_tile = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer}); auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id); @@ -2319,42 +2284,39 @@ static void print_resources_utilization() { num_type_instances[logical_block]++; num_placed_instances[logical_block][physical_tile]++; - max_block_name = std::max(max_block_name, - strlen(logical_block->name)); - max_tile_name = std::max(max_tile_name, - strlen(physical_tile->name)); + max_block_name = std::max(max_block_name, strlen(logical_block->name)); + max_tile_name = std::max(max_tile_name, strlen(physical_tile->name)); } VTR_LOG("\n"); VTR_LOG("Placement resource usage:\n"); - for (auto logical_block : num_type_instances) { - for (auto physical_tile : num_placed_instances[logical_block.first]) { + for (const auto [logical_block_type_ptr, _] : num_type_instances) { + for (const auto [physical_tile_type_ptr, num_instances] : num_placed_instances[logical_block_type_ptr]) { VTR_LOG(" %-*s implemented as %-*s: %d\n", max_block_name, - logical_block.first->name, max_tile_name, - physical_tile.first->name, physical_tile.second); + logical_block_type_ptr->name, max_tile_name, + physical_tile_type_ptr->name, num_instances); } } VTR_LOG("\n"); } -static void print_placement_swaps_stats(const t_annealing_state& state) { - size_t total_swap_attempts = num_swap_rejected + num_swap_accepted - + num_swap_aborted; +static void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats) { + size_t total_swap_attempts = swap_stats.num_swap_rejected + swap_stats.num_swap_accepted + swap_stats.num_swap_aborted; VTR_ASSERT(total_swap_attempts > 0); size_t num_swap_print_digits = ceil(log10(total_swap_attempts)); - float reject_rate = (float)num_swap_rejected / total_swap_attempts; - float accept_rate = (float)num_swap_accepted / total_swap_attempts; - float abort_rate = (float)num_swap_aborted / total_swap_attempts; + float reject_rate = (float)swap_stats.num_swap_rejected / total_swap_attempts; + float accept_rate = (float)swap_stats.num_swap_accepted / total_swap_attempts; + float abort_rate = (float)swap_stats.num_swap_aborted / total_swap_attempts; VTR_LOG("Placement number of temperatures: %d\n", state.num_temps); VTR_LOG("Placement total # of swap attempts: %*d\n", num_swap_print_digits, total_swap_attempts); VTR_LOG("\tSwaps accepted: %*d (%4.1f %%)\n", num_swap_print_digits, - num_swap_accepted, 100 * accept_rate); + swap_stats.num_swap_accepted, 100 * accept_rate); VTR_LOG("\tSwaps rejected: %*d (%4.1f %%)\n", num_swap_print_digits, - num_swap_rejected, 100 * reject_rate); + swap_stats.num_swap_rejected, 100 * reject_rate); VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits, - num_swap_aborted, 100 * abort_rate); + swap_stats.num_swap_aborted, 100 * abort_rate); } static void print_placement_move_types_stats(const MoveTypeStat& move_type_stat) { @@ -2409,28 +2371,25 @@ static void print_placement_move_types_stats(const MoveTypeStat& move_type_stat) VTR_LOG("\n"); } -static void calculate_reward_and_process_outcome( - const t_placer_opts& placer_opts, - const MoveOutcomeStats& move_outcome_stats, - const double& delta_c, - float timing_bb_factor, - MoveGenerator& move_generator) { - std::string reward_fun_string = placer_opts.place_reward_fun; +static void calculate_reward_and_process_outcome(const t_placer_opts& placer_opts, + const MoveOutcomeStats& move_outcome_stats, + double delta_c, + float timing_bb_factor, + MoveGenerator& move_generator) { static std::optional reward_fun; if (!reward_fun.has_value()) { - reward_fun = string_to_reward(reward_fun_string); + reward_fun = string_to_reward(placer_opts.place_reward_fun); } - if (reward_fun == BASIC) { + if (reward_fun == e_reward_function::BASIC) { move_generator.process_outcome(-1 * delta_c, reward_fun.value()); - } else if (reward_fun == NON_PENALIZING_BASIC - || reward_fun == RUNTIME_AWARE) { + } else if (reward_fun == e_reward_function::NON_PENALIZING_BASIC || reward_fun == e_reward_function::RUNTIME_AWARE) { if (delta_c < 0) { move_generator.process_outcome(-1 * delta_c, reward_fun.value()); } else { move_generator.process_outcome(0, reward_fun.value()); } - } else if (reward_fun == WL_BIASED_RUNTIME_AWARE) { + } else if (reward_fun == e_reward_function::WL_BIASED_RUNTIME_AWARE) { if (delta_c < 0) { float reward = -1 * (move_outcome_stats.delta_cost_norm @@ -2445,6 +2404,18 @@ static void calculate_reward_and_process_outcome( } } -bool placer_needs_lookahead(const t_vpr_setup& vpr_setup) { - return (vpr_setup.PlacerOpts.place_algorithm.is_timing_driven()); +static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry) { + auto& place_ctx = g_vpr_ctx.mutable_placement(); + + // the placement location variables should be unlocked before being accessed + place_ctx.unlock_loc_vars(); + + // copy the local location variables into the global state + auto& global_blk_loc_registry = place_ctx.mutable_blk_loc_registry(); + global_blk_loc_registry = blk_loc_registry; + +#ifndef NO_GRAPHICS + // update the graphics' reference to placement location variables + set_graphics_blk_loc_registry_ref(global_blk_loc_registry); +#endif } diff --git a/vpr/src/place/place.h b/vpr/src/place/place.h index 55ff24b7ad6..dba2f79ab23 100644 --- a/vpr/src/place/place.h +++ b/vpr/src/place/place.h @@ -16,6 +16,4 @@ void try_place(const Netlist<>& net_list, int num_directs, bool is_flat); -bool placer_needs_lookahead(const t_vpr_setup& vpr_setup); - #endif diff --git a/vpr/src/place/place_checkpoint.cpp b/vpr/src/place/place_checkpoint.cpp index 86af65142c3..ed13382a95f 100644 --- a/vpr/src/place/place_checkpoint.cpp +++ b/vpr/src/place/place_checkpoint.cpp @@ -1,33 +1,43 @@ #include "place_checkpoint.h" #include "noc_place_utils.h" +#include "placer_state.h" +#include "grid_block.h" -float t_placement_checkpoint::get_cp_cpd() { return cpd; } -double t_placement_checkpoint::get_cp_bb_cost() { return costs.bb_cost; } -bool t_placement_checkpoint::cp_is_valid() { return valid; } +float t_placement_checkpoint::get_cp_cpd() const { return cpd_; } -void t_placement_checkpoint::save_placement(const t_placer_costs& placement_costs, const float& critical_path_delay) { - auto& place_ctx = g_vpr_ctx.placement(); - block_locs = place_ctx.block_locs; - valid = true; - cpd = critical_path_delay; - costs = placement_costs; +double t_placement_checkpoint::get_cp_bb_cost() const { return costs_.bb_cost; } + +bool t_placement_checkpoint::cp_is_valid() const { return valid_; } + +void t_placement_checkpoint::save_placement(const vtr::vector_map& block_locs, + const t_placer_costs& placement_costs, + const float critical_path_delay) { + block_locs_ = block_locs; + valid_ = true; + cpd_ = critical_path_delay; + costs_ = placement_costs; } -t_placer_costs t_placement_checkpoint::restore_placement() { - auto& mutable_place_ctx = g_vpr_ctx.mutable_placement(); - mutable_place_ctx.block_locs = block_locs; - load_grid_blocks_from_block_locs(); - return costs; +t_placer_costs t_placement_checkpoint::restore_placement(vtr::vector_map& block_locs, + GridBlock& grid_blocks) { + block_locs = block_locs_; + grid_blocks.load_from_block_locs(block_locs); + return costs_; } -void save_placement_checkpoint_if_needed(t_placement_checkpoint& placement_checkpoint, std::shared_ptr timing_info, t_placer_costs& costs, float cpd) { - if (placement_checkpoint.cp_is_valid() == false || (timing_info->least_slack_critical_path().delay() < placement_checkpoint.get_cp_cpd() && costs.bb_cost <= placement_checkpoint.get_cp_bb_cost())) { - placement_checkpoint.save_placement(costs, cpd); +void save_placement_checkpoint_if_needed(const vtr::vector_map& block_locs, + t_placement_checkpoint& placement_checkpoint, + const std::shared_ptr& timing_info, + t_placer_costs& costs, + float cpd) { + if (!placement_checkpoint.cp_is_valid() || (timing_info->least_slack_critical_path().delay() < placement_checkpoint.get_cp_cpd() && costs.bb_cost <= placement_checkpoint.get_cp_bb_cost())) { + placement_checkpoint.save_placement(block_locs, costs, cpd); VTR_LOG("Checkpoint saved: bb_costs=%g, TD costs=%g, CPD=%7.3f (ns) \n", costs.bb_cost, costs.timing_cost, 1e9 * cpd); } } -void restore_best_placement(t_placement_checkpoint& placement_checkpoint, +void restore_best_placement(PlacerState& placer_state, + t_placement_checkpoint& placement_checkpoint, std::shared_ptr& timing_info, t_placer_costs& costs, std::unique_ptr& placer_criticalities, @@ -43,19 +53,21 @@ void restore_best_placement(t_placement_checkpoint& placement_checkpoint, */ if (placement_checkpoint.cp_is_valid() && timing_info->least_slack_critical_path().delay() > placement_checkpoint.get_cp_cpd() && costs.bb_cost * 1.05 > placement_checkpoint.get_cp_bb_cost()) { //restore the latest placement checkpoint - costs = placement_checkpoint.restore_placement(); + + costs = placement_checkpoint.restore_placement(placer_state.mutable_block_locs(), placer_state.mutable_grid_blocks()); //recompute timing from scratch placer_criticalities.get()->set_recompute_required(); placer_setup_slacks.get()->set_recompute_required(); - comp_td_connection_delays(place_delay_model.get()); + comp_td_connection_delays(place_delay_model.get(), placer_state); perform_full_timing_update(crit_params, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), pin_timing_invalidator.get(), timing_info.get(), - &costs); + &costs, + placer_state); /* If NoC is enabled, re-compute NoC costs and re-initialize NoC internal data structures. * If some routers have different locations than the last placement, NoC-related costs and @@ -63,7 +75,7 @@ void restore_best_placement(t_placement_checkpoint& placement_checkpoint, * and need to be re-computed from scratch. */ if (noc_opts.noc) { - reinitialize_noc_routing(costs, {}); + reinitialize_noc_routing(costs, {}, placer_state.block_locs()); } VTR_LOG("\nCheckpoint restored\n"); diff --git a/vpr/src/place/place_checkpoint.h b/vpr/src/place/place_checkpoint.h index d3315aca2f2..cd197beb5bb 100644 --- a/vpr/src/place/place_checkpoint.h +++ b/vpr/src/place/place_checkpoint.h @@ -10,45 +10,69 @@ #include "place_delay_model.h" #include "place_timing_update.h" -//Placement checkpoint + /** * @brief Data structure that stores the placement state and saves it as a checkpoint. * * The placement checkpoints are very useful to solve the problem of critical - * delay oscillations, expecially very late in the annealer. - * - * @param cost The weighted average of the wiring cost and the timing cost. - * @param block_locs saves the location of each block - * @param cpd Saves the critical path delay of the current checkpoint - * @param valid a flag to show whether the current checkpoint is initialized or not + * delay oscillations, especially very late in the annealer. + * @param block_locs_ saves the location of each block + * @param cpd_ Saves the critical path delay of the current checkpoint + * @param valid_ a flag to show whether the current checkpoint is initialized or not + * @param costs_ The weighted average of the wiring cost and the timing cost. */ class t_placement_checkpoint { private: - vtr::vector_map block_locs; - float cpd; - bool valid = false; - t_placer_costs costs; + vtr::vector_map block_locs_; + float cpd_; + bool valid_ = false; + t_placer_costs costs_; public: - //save the block locations from placement context with the current placement cost and cpd - void save_placement(const t_placer_costs& placement_costs, const float& critical_path_delay); + /** + * @brief Saves the given block locations and their corresponding placement cost and CPD + * @param block_locs The block locations to be saved. + * @param placement_costs Different cost terms associated with the given placement. + * @param critical_path_delay The critical path delay associated with the given placement. + */ + void save_placement(const vtr::vector_map& block_locs, + const t_placer_costs& placement_costs, + const float critical_path_delay); - //restore the placement solution saved in the checkpoint and update the placement context accordingly - t_placer_costs restore_placement(); + /** + * @brief Restores the placement solution saved in the checkpoint and update the placement context accordingly + * @param block_locs To be filled with the saved placement. + * @param grid_blocks To be filled with grid location to clustered block mapping of the saved placement. + * @return Different cost terms associated with the saved placement. + */ + t_placer_costs restore_placement(vtr::vector_map& block_locs, + GridBlock& grid_blocks); //return the critical path delay of the saved checkpoint - float get_cp_cpd(); + float get_cp_cpd() const; //return the WL cost of the saved checkpoint - double get_cp_bb_cost(); + double get_cp_bb_cost() const; //return true if the checkpoint is valid - bool cp_is_valid(); + bool cp_is_valid() const; }; -//save placement checkpoint if checkpointing is enabled and checkpoint conditions occured -void save_placement_checkpoint_if_needed(t_placement_checkpoint& placement_checkpoint, std::shared_ptr timing_info, t_placer_costs& costs, float cpd); +//save placement checkpoint if checkpointing is enabled and checkpoint conditions occurred +void save_placement_checkpoint_if_needed(const vtr::vector_map& block_locs, + t_placement_checkpoint& placement_checkpoint, + const std::shared_ptr& timing_info, + t_placer_costs& costs, + float cpd); //restore the checkpoint if it's better than the latest placement solution -void restore_best_placement(t_placement_checkpoint& placement_checkpoint, std::shared_ptr& timing_info, t_placer_costs& costs, std::unique_ptr& placer_criticalities, std::unique_ptr& placer_setup_slacks, std::unique_ptr& place_delay_model, std::unique_ptr& pin_timing_invalidator, PlaceCritParams crit_params, const t_noc_opts& noc_opts); +void restore_best_placement(PlacerState& placer_state, + t_placement_checkpoint& placement_checkpoint, + std::shared_ptr& timing_info, + t_placer_costs& costs, + std::unique_ptr& placer_criticalities, + std::unique_ptr& placer_setup_slacks, + std::unique_ptr& place_delay_model, + std::unique_ptr& pin_timing_invalidator, + PlaceCritParams crit_params, const t_noc_opts& noc_opts); #endif diff --git a/vpr/src/place/place_constraints.cpp b/vpr/src/place/place_constraints.cpp index 51da069db38..22b15f5a04f 100644 --- a/vpr/src/place/place_constraints.cpp +++ b/vpr/src/place/place_constraints.cpp @@ -13,13 +13,12 @@ #include "place_util.h" #include "re_cluster_util.h" -int check_placement_floorplanning() { +int check_placement_floorplanning(const vtr::vector_map& block_locs) { int error = 0; auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - auto loc = place_ctx.block_locs[blk_id].loc; + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + t_pl_loc loc = block_locs[blk_id].loc; if (!cluster_floorplanning_legal(blk_id, loc)) { error++; VTR_LOG_ERROR("Block %zu is not in correct floorplanning region.\n", size_t(blk_id)); @@ -162,10 +161,9 @@ void propagate_place_constraints() { auto& place_ctx = g_vpr_ctx.placement(); auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - for (auto pl_macro : place_ctx.pl_macros) { + for (const t_pl_macro& pl_macro : place_ctx.pl_macros) { if (is_macro_constrained(pl_macro)) { - /* - * Get the PartitionRegion for the head of the macro + /* Get the PartitionRegion for the head of the macro * based on the constraints of all blocks contained in the macro */ PartitionRegion macro_head_pr = update_macro_head_pr(pl_macro); @@ -254,12 +252,11 @@ void load_cluster_constraints() { } } -void mark_fixed_blocks() { +void mark_fixed_blocks(BlkLocRegistry& blk_loc_registry) { auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.mutable_placement(); auto& floorplanning_ctx = g_vpr_ctx.floorplanning(); - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { if (!is_cluster_constrained(blk_id)) { continue; } @@ -273,9 +270,8 @@ void mark_fixed_blocks() { * and mark it as fixed. */ if (is_pr_size_one(pr, block_type, loc)) { - set_block_location(blk_id, loc); - - place_ctx.block_locs[blk_id].is_fixed = true; + blk_loc_registry.set_block_location(blk_id, loc); + blk_loc_registry.mutable_block_locs()[blk_id].is_fixed = true; } } } @@ -475,7 +471,7 @@ int get_part_reg_size(const PartitionRegion& pr, const std::vector& regions = pr.get_regions(); int num_tiles = 0; - for (const auto& region : regions) { + for (const Region& region : regions) { num_tiles += grid_tiles.region_tile_count(region, block_type); } diff --git a/vpr/src/place/place_constraints.h b/vpr/src/place/place_constraints.h index 493f378b99d..02157e907a1 100644 --- a/vpr/src/place/place_constraints.h +++ b/vpr/src/place/place_constraints.h @@ -19,10 +19,10 @@ * @brief Check that placement of each block is within the floorplan constraint region * of that block (if the block has any constraints). * + * @param block_locs Contains the location where each clustered block is placed. * @return int The number of errors (inconsistencies in adherence to floorplanning constraints). */ -int check_placement_floorplanning(); - +int check_placement_floorplanning(const vtr::vector_map& block_locs); /** * @brief Check if the block has floorplanning constraints. @@ -134,10 +134,14 @@ void load_cluster_constraints(); * @brief Marks blocks as fixed if they have a constraint region that * specifies exactly one x, y, subtile location as legal. * - * Marking them as fixed indicates that they cannot be moved + * @param blk_loc_registry Placement block location information. Used to set + * the location of clustered block constrained to a single location and mark them + * as fixed. + * + * @note Marking such constrained blocks as fixed indicates that they cannot be moved * during initial placement and simulated annealing. */ -void mark_fixed_blocks(); +void mark_fixed_blocks(BlkLocRegistry& blk_loc_registry); /** * @brief Converts the floorplanning constraints from grid location to diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/place_delay_model.cpp index 8f9e440d621..ea21d581273 100644 --- a/vpr/src/place/place_delay_model.cpp +++ b/vpr/src/place/place_delay_model.cpp @@ -11,13 +11,12 @@ #include "rr_graph2.h" #include "timing_place_lookup.h" +#include "placer_state.h" #include "vtr_log.h" #include "vtr_math.h" #include "vpr_error.h" -#include "placer_globals.h" - #ifdef VTR_ENABLE_CAPNPROTO # include "capnp/serialize.h" # include "place_delay_model.capnp.h" @@ -352,9 +351,11 @@ std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& * Only estimate delay for signals routed through the inter-block routing network. * TODO: Do how should we compute the delay for globals. "Global signals are assumed to have zero delay." */ -float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, ClusterNetId net_id, int ipin) { +float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, + const vtr::vector_map& block_locs, + ClusterNetId net_id, + int ipin) { auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); float delay_source_to_sink = 0.; @@ -368,12 +369,8 @@ float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, Cluste int source_block_ipin = cluster_ctx.clb_nlist.pin_logical_index(source_pin); int sink_block_ipin = cluster_ctx.clb_nlist.pin_logical_index(sink_pin); - int source_x = place_ctx.block_locs[source_block].loc.x; - int source_y = place_ctx.block_locs[source_block].loc.y; - int source_layer = place_ctx.block_locs[source_block].loc.layer; - int sink_x = place_ctx.block_locs[sink_block].loc.x; - int sink_y = place_ctx.block_locs[sink_block].loc.y; - int sink_layer = place_ctx.block_locs[sink_block].loc.layer; + t_pl_loc source_block_loc = block_locs[source_block].loc; + t_pl_loc sink_block_loc = block_locs[sink_block].loc; /** * This heuristic only considers delta_x and delta_y, a much better @@ -382,18 +379,16 @@ float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, Cluste * In particular this approach does not accurately capture the effect * of fast carry-chain connections. */ - delay_source_to_sink = delay_model->delay({source_x, source_y, source_layer}, - source_block_ipin, - {sink_x, sink_y, sink_layer}, - sink_block_ipin); + delay_source_to_sink = delay_model->delay({source_block_loc.x, source_block_loc.y, source_block_loc.layer}, source_block_ipin, + {sink_block_loc.x, sink_block_loc.y, sink_block_loc.layer}, sink_block_ipin); if (delay_source_to_sink < 0) { VPR_ERROR(VPR_ERROR_PLACE, - "in comp_td_single_connection_delay: Bad delay_source_to_sink value %g from %s (at %d,%d) to %s (at %d,%d)\n" + "in comp_td_single_connection_delay: Bad delay_source_to_sink value %g from %s (at %d,%d,%d) to %s (at %d,%d,%d)\n" "in comp_td_single_connection_delay: Delay is less than 0\n", - block_type_pin_index_to_name(physical_tile_type(source_block), source_block_ipin, false).c_str(), - source_x, source_y, - block_type_pin_index_to_name(physical_tile_type(sink_block), sink_block_ipin, false).c_str(), - sink_x, sink_y, + block_type_pin_index_to_name(physical_tile_type(source_block_loc), source_block_ipin, false).c_str(), + source_block_loc.x, source_block_loc.y, source_block_loc.layer, + block_type_pin_index_to_name(physical_tile_type(sink_block_loc), sink_block_ipin, false).c_str(), + sink_block_loc.x, sink_block_loc.y, sink_block_loc.layer, delay_source_to_sink); } } @@ -402,14 +397,16 @@ float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, Cluste } ///@brief Recompute all point to point delays, updating `connection_delay` matrix. -void comp_td_connection_delays(const PlaceDelayModel* delay_model) { +void comp_td_connection_delays(const PlaceDelayModel* delay_model, + PlacerState& placer_state) { const auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& p_timing_ctx = g_placer_ctx.mutable_timing(); + auto& p_timing_ctx = placer_state.mutable_timing(); + auto& block_locs = placer_state.block_locs(); auto& connection_delay = p_timing_ctx.connection_delay; - for (auto net_id : cluster_ctx.clb_nlist.nets()) { + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ++ipin) { - connection_delay[net_id][ipin] = comp_td_single_connection_delay(delay_model, net_id, ipin); + connection_delay[net_id][ipin] = comp_td_single_connection_delay(delay_model, block_locs, net_id, ipin); } } } diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/place_delay_model.h index 681cc136d87..5f61b856405 100644 --- a/vpr/src/place/place_delay_model.h +++ b/vpr/src/place/place_delay_model.h @@ -26,6 +26,7 @@ ///@brief Forward declarations. class PlaceDelayModel; +class PlacerState; ///@brief Initialize the placer delay model. std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, @@ -39,10 +40,14 @@ std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& bool is_flat); ///@brief Returns the delay of one point to point connection. -float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, ClusterNetId net_id, int ipin); +float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, + const vtr::vector_map& block_locs, + ClusterNetId net_id, + int ipin); ///@brief Recompute all point to point delays, updating `connection_delay` matrix. -void comp_td_connection_delays(const PlaceDelayModel* delay_model); +void comp_td_connection_delays(const PlaceDelayModel* delay_model, + PlacerState& placer_state); ///@brief Abstract interface to a placement delay model. class PlaceDelayModel { diff --git a/vpr/src/place/place_timing_update.cpp b/vpr/src/place/place_timing_update.cpp index 619ea0ad068..8c941bd1d81 100644 --- a/vpr/src/place/place_timing_update.cpp +++ b/vpr/src/place/place_timing_update.cpp @@ -5,16 +5,20 @@ #include "vtr_time.h" -#include "placer_globals.h" #include "place_timing_update.h" +#include "placer_state.h" /* Routines local to place_timing_update.cpp */ static double comp_td_connection_cost(const PlaceDelayModel* delay_model, const PlacerCriticalities& place_crit, + PlacerState& placer_state, ClusterNetId net, int ipin); -static double sum_td_net_cost(ClusterNetId net); -static double sum_td_costs(); + +static double sum_td_net_cost(ClusterNetId net, + PlacerState& placer_state); + +static double sum_td_costs(const PlacerState& placer_state); ///@brief Use an incremental approach to updating timing costs after re-computing criticalities static constexpr bool INCR_COMP_TD_COSTS = true; @@ -31,7 +35,8 @@ void initialize_timing_info(const PlaceCritParams& crit_params, PlacerSetupSlacks* setup_slacks, NetPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info, - t_placer_costs* costs) { + t_placer_costs* costs, + PlacerState& placer_state) { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& clb_nlist = cluster_ctx.clb_nlist; @@ -51,13 +56,14 @@ void initialize_timing_info(const PlaceCritParams& crit_params, setup_slacks, pin_timing_invalidator, timing_info, - costs); + costs, + placer_state); //Don't warn again about unconstrained nodes again during placement timing_info->set_warn_unconstrained(false); //Clear all update_td_costs() runtime stat variables - auto& p_runtime_ctx = g_placer_ctx.mutable_runtime(); + auto& p_runtime_ctx = placer_state.mutable_runtime(); p_runtime_ctx.f_update_td_costs_connections_elapsed_sec = 0.f; p_runtime_ctx.f_update_td_costs_nets_elapsed_sec = 0.f; p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec = 0.f; @@ -79,7 +85,8 @@ void perform_full_timing_update(const PlaceCritParams& crit_params, PlacerSetupSlacks* setup_slacks, NetPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info, - t_placer_costs* costs) { + t_placer_costs* costs, + PlacerState& placer_state) { /* Update all timing related classes. */ criticalities->enable_update(); setup_slacks->enable_update(); @@ -87,15 +94,17 @@ void perform_full_timing_update(const PlaceCritParams& crit_params, timing_info, criticalities, setup_slacks, - pin_timing_invalidator); + pin_timing_invalidator, + placer_state); /* Update the timing cost with new connection criticalities. */ update_timing_cost(delay_model, criticalities, + placer_state, &costs->timing_cost); /* Commit the setup slacks since they are updated. */ - commit_setup_slacks(setup_slacks); + commit_setup_slacks(setup_slacks, placer_state); } /** @@ -127,12 +136,13 @@ void update_timing_classes(const PlaceCritParams& crit_params, SetupTimingInfo* timing_info, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, - NetPinTimingInvalidator* pin_timing_invalidator) { + NetPinTimingInvalidator* pin_timing_invalidator, + PlacerState& placer_state) { /* Run STA to update slacks and adjusted/relaxed criticalities. */ timing_info->update(); /* Update the placer's criticalities (e.g. sharpen with crit_exponent). */ - criticalities->update_criticalities(timing_info, crit_params); + criticalities->update_criticalities(timing_info, crit_params, placer_state); /* Update the placer's raw setup slacks. */ setup_slacks->update_setup_slacks(timing_info); @@ -155,11 +165,12 @@ void update_timing_classes(const PlaceCritParams& crit_params, */ void update_timing_cost(const PlaceDelayModel* delay_model, const PlacerCriticalities* criticalities, + PlacerState& placer_state, double* timing_cost) { #ifdef INCR_COMP_TD_COSTS - update_td_costs(delay_model, *criticalities, timing_cost); + update_td_costs(delay_model, *criticalities, block_locs, timing_cost); #else - comp_td_costs(delay_model, *criticalities, timing_cost); + comp_td_costs(delay_model, *criticalities, placer_state, timing_cost); #endif } @@ -180,9 +191,10 @@ void update_timing_cost(const PlaceDelayModel* delay_model, * rejected, so for efficiency reasons, this routine is not called if the slacks are * rejected in the end. For more detailed info, see the try_swap() routine. */ -void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks) { +void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks, + PlacerState& placer_state) { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - auto& connection_setup_slack = g_placer_ctx.mutable_timing().connection_setup_slack; + auto& connection_setup_slack = placer_state.mutable_timing().connection_setup_slack; /* Incremental: only go through sink pins with modified setup slack */ auto clb_pins_modified = setup_slacks->pins_with_modified_setup_slack(); @@ -205,9 +217,10 @@ void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks) { * the same as the values in `connection_setup_slack` without running commit_setup_slacks(). * For more detailed info, see the try_swap() routine. */ -bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks) { +bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks, + const PlacerState& placer_state) { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - const auto& connection_setup_slack = g_placer_ctx.timing().connection_setup_slack; + const auto& connection_setup_slack = placer_state.timing().connection_setup_slack; /* Go through every single sink pin to check that the slack values are the same */ for (ClusterNetId net_id : clb_nlist.nets()) { @@ -240,13 +253,16 @@ bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks) { * * See PlacerTimingCosts object used to represent connection_timing_costs for details. */ -void update_td_costs(const PlaceDelayModel* delay_model, const PlacerCriticalities& place_crit, double* timing_cost) { +void update_td_costs(const PlaceDelayModel* delay_model, + const PlacerCriticalities& place_crit, + PlacerState& placer_state, + double* timing_cost) { vtr::Timer t; auto& cluster_ctx = g_vpr_ctx.clustering(); auto& clb_nlist = cluster_ctx.clb_nlist; - auto& p_timing_ctx = g_placer_ctx.mutable_timing(); - auto& p_runtime_ctx = g_placer_ctx.mutable_runtime(); + auto& p_timing_ctx = placer_state.mutable_timing(); + auto& p_runtime_ctx = placer_state.mutable_runtime(); auto& connection_timing_cost = p_timing_ctx.connection_timing_cost; //Update the modified pin timing costs @@ -264,7 +280,7 @@ void update_td_costs(const PlaceDelayModel* delay_model, const PlacerCriticaliti int ipin = clb_nlist.pin_net_index(clb_pin); VTR_ASSERT_SAFE(ipin >= 1 && ipin < int(clb_nlist.net_pins(clb_net).size())); - double new_timing_cost = comp_td_connection_cost(delay_model, place_crit, clb_net, ipin); + double new_timing_cost = comp_td_connection_cost(delay_model, place_crit, placer_state, clb_net, ipin); //Record new value connection_timing_cost[clb_net][ipin] = new_timing_cost; @@ -301,27 +317,30 @@ void update_td_costs(const PlaceDelayModel* delay_model, const PlacerCriticaliti * * For a more efficient incremental update, see update_td_costs(). */ -void comp_td_costs(const PlaceDelayModel* delay_model, const PlacerCriticalities& place_crit, double* timing_cost) { +void comp_td_costs(const PlaceDelayModel* delay_model, + const PlacerCriticalities& place_crit, + PlacerState& placer_state, + double* timing_cost) { auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& p_timing_ctx = g_placer_ctx.mutable_timing(); + auto& p_timing_ctx = placer_state.mutable_timing(); auto& connection_timing_cost = p_timing_ctx.connection_timing_cost; auto& net_timing_cost = p_timing_ctx.net_timing_cost; - for (auto net_id : cluster_ctx.clb_nlist.nets()) { + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) continue; for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) { - float conn_timing_cost = comp_td_connection_cost(delay_model, place_crit, net_id, ipin); + float conn_timing_cost = comp_td_connection_cost(delay_model, place_crit, placer_state, net_id, ipin); /* Record new value */ connection_timing_cost[net_id][ipin] = conn_timing_cost; } /* Store net timing cost for more efficient incremental updating */ - net_timing_cost[net_id] = sum_td_net_cost(net_id); + net_timing_cost[net_id] = sum_td_net_cost(net_id, placer_state); } /* Make sure timing cost does not go above MIN_TIMING_COST. */ - *timing_cost = sum_td_costs(); + *timing_cost = sum_td_costs(placer_state); } /** @@ -332,19 +351,21 @@ void comp_td_costs(const PlaceDelayModel* delay_model, const PlacerCriticalities */ static double comp_td_connection_cost(const PlaceDelayModel* delay_model, const PlacerCriticalities& place_crit, + PlacerState& placer_state, ClusterNetId net, int ipin) { - const auto& p_timing_ctx = g_placer_ctx.timing(); + const auto& p_timing_ctx = placer_state.timing(); + const auto& block_locs = placer_state.block_locs(); VTR_ASSERT_SAFE_MSG(ipin > 0, "Shouldn't be calculating connection timing cost for driver pins"); - VTR_ASSERT_SAFE_MSG(p_timing_ctx.connection_delay[net][ipin] == comp_td_single_connection_delay(delay_model, net, ipin), + VTR_ASSERT_SAFE_MSG(p_timing_ctx.connection_delay[net][ipin] == comp_td_single_connection_delay(delay_model, block_locs, net, ipin), "Connection delays should already be updated"); double conn_timing_cost = place_crit.criticality(net, ipin) * p_timing_ctx.connection_delay[net][ipin]; VTR_ASSERT_SAFE_MSG(std::isnan(p_timing_ctx.proposed_connection_delay[net][ipin]), - "Propsoed connection delay should already be invalidated"); + "Proposed connection delay should already be invalidated"); VTR_ASSERT_SAFE_MSG(std::isnan(p_timing_ctx.proposed_connection_timing_cost[net][ipin]), "Proposed connection timing cost should already be invalidated"); @@ -353,9 +374,10 @@ static double comp_td_connection_cost(const PlaceDelayModel* delay_model, } ///@brief Returns the timing cost of the specified 'net' based on the values in connection_timing_cost. -static double sum_td_net_cost(ClusterNetId net) { +static double sum_td_net_cost(ClusterNetId net, + PlacerState& placer_state) { const auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& p_timing_ctx = g_placer_ctx.mutable_timing(); + auto& p_timing_ctx = placer_state.timing(); auto& connection_timing_cost = p_timing_ctx.connection_timing_cost; double net_td_cost = 0; @@ -366,14 +388,14 @@ static double sum_td_net_cost(ClusterNetId net) { return net_td_cost; } -///@brief Returns the total timing cost accross all nets based on the values in net_timing_cost. -static double sum_td_costs() { +///@brief Returns the total timing cost across all nets based on the values in net_timing_cost. +static double sum_td_costs(const PlacerState& placer_state) { const auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& p_timing_ctx = g_placer_ctx.timing(); + const auto& p_timing_ctx = placer_state.timing(); const auto& net_timing_cost = p_timing_ctx.net_timing_cost; double td_cost = 0; - for (auto net_id : cluster_ctx.clb_nlist.nets()) { + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) { continue; } diff --git a/vpr/src/place/place_timing_update.h b/vpr/src/place/place_timing_update.h index 67fca81b3ee..7944c4a7552 100644 --- a/vpr/src/place/place_timing_update.h +++ b/vpr/src/place/place_timing_update.h @@ -16,7 +16,8 @@ void initialize_timing_info(const PlaceCritParams& crit_params, PlacerSetupSlacks* setup_slacks, NetPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info, - t_placer_costs* costs); + t_placer_costs* costs, + PlacerState& placer_state); ///@brief Updates every timing related classes, variables and structures. void perform_full_timing_update(const PlaceCritParams& crit_params, @@ -25,31 +26,42 @@ void perform_full_timing_update(const PlaceCritParams& crit_params, PlacerSetupSlacks* setup_slacks, NetPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info, - t_placer_costs* costs); + t_placer_costs* costs, + PlacerState& placer_state); ///@brief Update timing information based on the current block positions. void update_timing_classes(const PlaceCritParams& crit_params, SetupTimingInfo* timing_info, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, - NetPinTimingInvalidator* pin_timing_invalidator); + NetPinTimingInvalidator* pin_timing_invalidator, + PlacerState& placer_state); ///@brief Updates the timing driven (td) costs. void update_timing_cost(const PlaceDelayModel* delay_model, const PlacerCriticalities* criticalities, + PlacerState& placer_state, double* timing_cost); ///@brief Incrementally updates timing cost based on the current delays and criticality estimates. -void update_td_costs(const PlaceDelayModel* delay_model, const PlacerCriticalities& place_crit, double* timing_cost); +void update_td_costs(const PlaceDelayModel* delay_model, + const PlacerCriticalities& place_crit, + PlacerState& placer_state, + double* timing_cost); ///@brief Recomputes timing cost from scratch based on the current delays and criticality estimates. -void comp_td_costs(const PlaceDelayModel* delay_model, const PlacerCriticalities& place_crit, double* timing_cost); +void comp_td_costs(const PlaceDelayModel* delay_model, + const PlacerCriticalities& place_crit, + PlacerState& placer_state, + double* timing_cost); /** * @brief Commit all the setup slack values from the PlacerSetupSlacks * class to `connection_setup_slack`. */ -void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks); +void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks, + PlacerState& placer_state); ///@brief Verify that the values in `connection_setup_slack` matches PlacerSetupSlacks. -bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks); +bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks, + const PlacerState& placer_state); diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index 52b9fdeb3d1..3541ef01bf1 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -13,20 +13,20 @@ * @brief Initialize `grid_blocks`, the inverse structure of `block_locs`. * * The container at each grid block location should have a length equal to the - * subtile capacity of that block. Unused subtile would be marked EMPTY_BLOCK_ID. + * subtile capacity of that block. Unused subtile would be marked ClusterBlockId::INVALID(). */ static GridBlock init_grid_blocks(); -void init_placement_context() { - auto& place_ctx = g_vpr_ctx.mutable_placement(); +void init_placement_context(vtr::vector_map& block_locs, + GridBlock& grid_blocks) { auto& cluster_ctx = g_vpr_ctx.clustering(); - /* Intialize the lookup of CLB block positions */ - place_ctx.block_locs.clear(); - place_ctx.block_locs.resize(cluster_ctx.clb_nlist.blocks().size()); + /* Initialize the lookup of CLB block positions */ + block_locs.clear(); + block_locs.resize(cluster_ctx.clb_nlist.blocks().size()); /* Initialize the reverse lookup of CLB block positions */ - place_ctx.grid_blocks = init_grid_blocks(); + grid_blocks = init_grid_blocks(); } static GridBlock init_grid_blocks() { @@ -44,6 +44,7 @@ static GridBlock init_grid_blocks() { } } } + return grid_blocks; } @@ -272,56 +273,9 @@ double get_std_dev(int n, double sum_x_squared, double av_x) { return (std_dev > 0.) ? sqrt(std_dev) : 0.; } -void load_grid_blocks_from_block_locs() { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.mutable_placement(); - auto& device_ctx = g_vpr_ctx.device(); - - zero_initialize_grid_blocks(); - - auto blocks = cluster_ctx.clb_nlist.blocks(); - for (auto blk_id : blocks) { - t_pl_loc location; - location = place_ctx.block_locs[blk_id].loc; - - VTR_ASSERT(location.x < (int)device_ctx.grid.width()); - VTR_ASSERT(location.y < (int)device_ctx.grid.height()); - - place_ctx.grid_blocks.set_block_at_location(location, blk_id); - place_ctx.grid_blocks.set_usage({location.x, location.y, location.layer}, - place_ctx.grid_blocks.get_usage({location.x, location.y, location.layer}) + 1); - } -} - -void zero_initialize_grid_blocks() { - auto& device_ctx = g_vpr_ctx.device(); - auto& place_ctx = g_vpr_ctx.mutable_placement(); - - /* Initialize all occupancy to zero. */ - - for (int layer_num = 0; layer_num < (int)device_ctx.grid.get_num_layers(); layer_num++) { - for (int i = 0; i < (int)device_ctx.grid.width(); i++) { - for (int j = 0; j < (int)device_ctx.grid.height(); j++) { - place_ctx.grid_blocks.set_usage({i, j, layer_num}, 0); - auto tile = device_ctx.grid.get_physical_type({i, j, layer_num}); - - for (const auto& sub_tile : tile->sub_tiles) { - auto capacity = sub_tile.capacity; - - for (int k = 0; k < capacity.total(); k++) { - if (place_ctx.grid_blocks.block_at_location({i, j, k + capacity.low, layer_num}) != INVALID_BLOCK_ID) { - place_ctx.grid_blocks.set_block_at_location({i, j, k + capacity.low, layer_num}, EMPTY_BLOCK_ID); - } - } - } - } - } - } -} void alloc_and_load_legal_placement_locations(std::vector>>& legal_pos) { auto& device_ctx = g_vpr_ctx.device(); - auto& place_ctx = g_vpr_ctx.placement(); //alloc the legal placement positions int num_tile_types = device_ctx.physical_tile_types.size(); @@ -341,9 +295,6 @@ void alloc_and_load_legal_placement_locations(std::vector int(device_ctx.grid.width() - 1) - || location.y < 0 || location.y > int(device_ctx.grid.height() - 1)) { - VPR_THROW(VPR_ERROR_PLACE, "Block %s with ID %d is out of range at location (%d, %d). \n", block_name.c_str(), blk_id, location.x, location.y); - } - - //Set the location of the block - place_ctx.block_locs[blk_id].loc = location; - - //Check if block is at an illegal location - auto physical_tile = device_ctx.grid.get_physical_type({location.x, location.y, location.layer}); - auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id); - - if (location.sub_tile >= physical_tile->capacity || location.sub_tile < 0) { - VPR_THROW(VPR_ERROR_PLACE, "Block %s subtile number (%d) is out of range. \n", block_name.c_str(), location.sub_tile); - } - - if (!is_sub_tile_compatible(physical_tile, logical_block, place_ctx.block_locs[blk_id].loc.sub_tile)) { - VPR_THROW(VPR_ERROR_PLACE, "Attempt to place block %s with ID %d at illegal location (%d,%d,%d). \n", - block_name.c_str(), - blk_id, - location.x, - location.y, - location.layer); - } - - //Mark the grid location and usage of the block - place_ctx.grid_blocks.set_block_at_location(location, blk_id); - place_ctx.grid_blocks.set_usage({location.x, location.y, location.layer}, - place_ctx.grid_blocks.get_usage({location.x, location.y, location.layer}) + 1); - place_sync_external_block_connections(blk_id); -} - -bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_legality) { - auto& device_ctx = g_vpr_ctx.device(); - auto& place_ctx = g_vpr_ctx.placement(); - auto& cluster_ctx = g_vpr_ctx.clustering(); +bool macro_can_be_placed(const t_pl_macro& pl_macro, + const t_pl_loc& head_pos, + bool check_all_legality, + const BlkLocRegistry& blk_loc_registry) { + const auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& grid_blocks = blk_loc_registry.grid_blocks(); //Get block type of head member ClusterBlockId blk_id = pl_macro.members[0].blk_index; @@ -460,7 +374,7 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_ // Also check whether the member position is valid, and the member_z is allowed at that location on the grid if (member_pos.x < int(device_ctx.grid.width()) && member_pos.y < int(device_ctx.grid.height()) && is_tile_compatible(device_ctx.grid.get_physical_type({member_pos.x, member_pos.y, member_pos.layer}), block_type) - && place_ctx.grid_blocks.block_at_location(member_pos) == EMPTY_BLOCK_ID) { + && grid_blocks.block_at_location(member_pos) == ClusterBlockId::INVALID()) { // Can still accommodate blocks here, check the next position continue; } else { diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h index 36c544ef344..934d2072251 100644 --- a/vpr/src/place/place_util.h +++ b/vpr/src/place/place_util.h @@ -6,15 +6,20 @@ #ifndef PLACE_UTIL_H #define PLACE_UTIL_H + #include + #include "vpr_types.h" #include "vtr_util.h" #include "vtr_vector_map.h" #include "globals.h" + + // forward declaration of t_placer_costs so that it can be used an argument // in NocCostTerms constructor class t_placer_costs; +class BlkLocRegistry; /** * @brief Data structure that stores different cost terms for NoC placement. @@ -303,7 +308,8 @@ class t_placer_statistics { * * Initialize both of them to empty states. */ -void init_placement_context(); +void init_placement_context(vtr::vector_map& block_locs, + GridBlock& grid_blocks); /** * @brief Get the initial limit for inner loop block move attempt limit. @@ -330,26 +336,15 @@ int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sch */ double get_std_dev(int n, double sum_x_squared, double av_x); -///@brief Initialize usage to 0 and blockID to EMPTY_BLOCK_ID for all place_ctx.grid_block locations -void zero_initialize_grid_blocks(); - -///@brief a utility to calculate grid_blocks given the updated block_locs (used in restore_checkpoint) -void load_grid_blocks_from_block_locs(); - /** * @brief Builds (alloc and load) legal_pos that holds all the legal locations for placement * - * @param legal_pos - * a lookup of all subtiles by sub_tile type - * legal_pos[0..device_ctx.num_block_types-1][0..num_sub_tiles - 1] = std::vector of all the legal locations - * of the proper tile type and sub_tile type - * + * @param legal_pos a lookup of all subtiles by sub_tile type + * legal_pos[0..device_ctx.num_block_types-1][0..num_sub_tiles - 1] = std::vector of all the legal locations + * of the proper tile type and sub_tile type */ void alloc_and_load_legal_placement_locations(std::vector>>& legal_pos); -///@brief Performs error checking to see if location is legal for block type, and sets the location and grid usage of the block if it is legal. -void set_block_location(ClusterBlockId blk_id, const t_pl_loc& location); - /// @brief check if a specified location is within the device grid inline bool is_loc_on_chip(t_physical_tile_loc loc) { const auto& grid = g_vpr_ctx.device().grid; @@ -381,6 +376,11 @@ inline bool is_loc_on_chip(t_physical_tile_loc loc) { * Analytic placer does not require to check block's capacity or * floorplanning constraints. However, initial placement or SA-based approach * require to check for all legality constraints. + * @param blk_loc_registry Placement block location information. + * */ -bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_legality); +bool macro_can_be_placed(const t_pl_macro& pl_macro, + const t_pl_loc& head_pos, + bool check_all_legality, + const BlkLocRegistry& blk_loc_registry); #endif diff --git a/vpr/src/place/placer_globals.cpp b/vpr/src/place/placer_globals.cpp deleted file mode 100644 index 56d2efa523c..00000000000 --- a/vpr/src/place/placer_globals.cpp +++ /dev/null @@ -1,8 +0,0 @@ -/** - * @file placer_globals.cpp - * @brief Defines the global variable `g_placer_ctx` in placer_globals.h - */ - -#include "placer_globals.h" - -PlacerContext g_placer_ctx; diff --git a/vpr/src/place/placer_globals.h b/vpr/src/place/placer_globals.h deleted file mode 100644 index 5e927d3b59c..00000000000 --- a/vpr/src/place/placer_globals.h +++ /dev/null @@ -1,10 +0,0 @@ -/** - * @file placer_globals.h - * @brief Declares the accessor variable for key global - * structs that are used everywhere in VPR placer. - */ - -#pragma once -#include "placer_context.h" - -extern PlacerContext g_placer_ctx; diff --git a/vpr/src/place/placer_context.h b/vpr/src/place/placer_state.h similarity index 67% rename from vpr/src/place/placer_context.h rename to vpr/src/place/placer_state.h index 5a7e4c6860f..97941f639b1 100644 --- a/vpr/src/place/placer_context.h +++ b/vpr/src/place/placer_state.h @@ -1,15 +1,17 @@ /** - * @file placer_context.h - * @brief Contains placer context/data structures referenced by various - * source files in vpr/src/place. - * - * All the variables and data structures in this file can be accessed via - * a single global variable: g_placer_ctx. (see placer_globals.h/.cpp). + * @file placer_state.h + * @brief Contains placer state/data structures referenced by various source files in vpr/src/place. + * A PlacerState object contains the placement state which is subject to change during the placement stage. + * During the placement stage, one or multiple local PlacerState objects are created. At the end of the placement stage, + * one of these object is copied to global placement context (PlacementContext). The PlacementContext, + * which is declared in vpr_context.h, contains the placement solution. The PlacementContext should not be used before + * the end of the placement stage. */ #pragma once #include "vpr_context.h" #include "vpr_net_pins_matrix.h" +#include "vpr_types.h" #include "timing_place.h" /** @@ -51,8 +53,6 @@ struct PlacerTimingContext : public Context { /** * @brief Net connection timing costs (i.e. criticality * delay) * of committed block positions. See PlacerTimingCosts. - * - * */ PlacerTimingCosts connection_timing_cost; @@ -106,7 +106,7 @@ struct PlacerMoveContext : public Context { // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each layer () vtr::Matrix num_sink_pin_layer; - // The first range limit calculated by the anneal + // The first range limit calculated by the annealer float first_rlim; // Scratch vectors that are used by different directed moves for temporary calculations (allocated here to save runtime) @@ -117,7 +117,7 @@ struct PlacerMoveContext : public Context { std::vector Y_coord; std::vector layer_coord; - // Container to save the highly critical pins (higher than a timing criticality limit setted by commandline option) + // Container to save the highly critical pins (higher than a timing criticality limit set by commandline option) std::vector> highly_crit_pins; }; @@ -134,19 +134,38 @@ struct PlacerMoveContext : public Context { * See the class VprContext in `vpr_context.h` for descriptions on * how to use this class due to similar implementation style. */ -class PlacerContext : public Context { +class PlacerState : public Context { public: - const PlacerTimingContext& timing() const { return timing_; } - PlacerTimingContext& mutable_timing() { return timing_; } + inline const PlacerTimingContext& timing() const { return timing_; } + inline PlacerTimingContext& mutable_timing() { return timing_; } + + inline const PlacerRuntimeContext& runtime() const { return runtime_; } + inline PlacerRuntimeContext& mutable_runtime() { return runtime_; } + + inline const PlacerMoveContext& move() const { return move_; } + inline PlacerMoveContext& mutable_move() { return move_; } - const PlacerRuntimeContext& runtime() const { return runtime_; } - PlacerRuntimeContext& mutable_runtime() { return runtime_; } + inline const vtr::vector_map& block_locs() const { return blk_loc_registry_.block_locs(); } + inline vtr::vector_map& mutable_block_locs() { return blk_loc_registry_.mutable_block_locs(); } - const PlacerMoveContext& move() const { return move_; } - PlacerMoveContext& mutable_move() { return move_; } + inline const GridBlock& grid_blocks() const { return blk_loc_registry_.grid_blocks(); } + inline GridBlock& mutable_grid_blocks() { return blk_loc_registry_.mutable_grid_blocks(); } + + inline const vtr::vector_map& physical_pins() const { return blk_loc_registry_.physical_pins(); } + inline vtr::vector_map& mutable_physical_pins() { return blk_loc_registry_.mutable_physical_pins(); } + + inline const BlkLocRegistry& blk_loc_registry() const { return blk_loc_registry_; } + inline BlkLocRegistry& mutable_blk_loc_registry() { return blk_loc_registry_; } private: PlacerTimingContext timing_; PlacerRuntimeContext runtime_; PlacerMoveContext move_; + + /** + * @brief Contains: 1) The location where each clustered block is placed at. + * 2) Which clustered blocks are located at a given location + * 3) The mapping between the clustered block pins and physical tile pins. + */ + BlkLocRegistry blk_loc_registry_; }; diff --git a/vpr/src/place/simpleRL_move_generator.cpp b/vpr/src/place/simpleRL_move_generator.cpp index 45e43e05762..3c1539d244c 100644 --- a/vpr/src/place/simpleRL_move_generator.cpp +++ b/vpr/src/place/simpleRL_move_generator.cpp @@ -112,7 +112,7 @@ std::vector KArmedBanditAgent::get_available_logical_blk_types_() { void KArmedBanditAgent::process_outcome(double reward, e_reward_function reward_fun) { ++num_action_chosen_[last_action_]; - if (reward_fun == RUNTIME_AWARE || reward_fun == WL_BIASED_RUNTIME_AWARE) { + if (reward_fun == e_reward_function::RUNTIME_AWARE || reward_fun == e_reward_function::WL_BIASED_RUNTIME_AWARE) { e_move_type move_type = action_to_move_type_(last_action_); reward /= time_elapsed_[move_type]; } diff --git a/vpr/src/place/simpleRL_move_generator.h b/vpr/src/place/simpleRL_move_generator.h index 179c653f965..25317d52ad6 100644 --- a/vpr/src/place/simpleRL_move_generator.h +++ b/vpr/src/place/simpleRL_move_generator.h @@ -216,7 +216,10 @@ class SimpleRLMoveGenerator : public MoveGenerator { */ template::value || std::is_same::value>::type> - explicit SimpleRLMoveGenerator(std::unique_ptr& agent, float noc_attraction_weight, size_t high_fanout_thresh); + explicit SimpleRLMoveGenerator(PlacerState& placer_state, + std::unique_ptr& agent, + float noc_attraction_weight, + size_t high_fanout_thresh); // Updates affected_blocks with the proposed move, while respecting the current rlim e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, @@ -230,22 +233,26 @@ class SimpleRLMoveGenerator : public MoveGenerator { }; template -SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr& agent, float noc_attraction_weight, size_t high_fanout_thresh) { +SimpleRLMoveGenerator::SimpleRLMoveGenerator(PlacerState& placer_state, + std::unique_ptr& agent, + float noc_attraction_weight, + size_t high_fanout_thresh) + : MoveGenerator(placer_state) { if (noc_attraction_weight > 0.0f) { all_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES); } else { all_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES - 1); } - all_moves[e_move_type::UNIFORM] = std::make_unique(); - all_moves[e_move_type::MEDIAN] = std::make_unique(); - all_moves[e_move_type::CENTROID] = std::make_unique(); - all_moves[e_move_type::W_CENTROID] = std::make_unique(); - all_moves[e_move_type::W_MEDIAN] = std::make_unique(); - all_moves[e_move_type::CRIT_UNIFORM] = std::make_unique(); - all_moves[e_move_type::FEASIBLE_REGION] = std::make_unique(); + all_moves[e_move_type::UNIFORM] = std::make_unique(placer_state); + all_moves[e_move_type::MEDIAN] = std::make_unique(placer_state); + all_moves[e_move_type::CENTROID] = std::make_unique(placer_state); + all_moves[e_move_type::W_CENTROID] = std::make_unique(placer_state); + all_moves[e_move_type::W_MEDIAN] = std::make_unique(placer_state); + all_moves[e_move_type::CRIT_UNIFORM] = std::make_unique(placer_state); + all_moves[e_move_type::FEASIBLE_REGION] = std::make_unique(placer_state); if (noc_attraction_weight > 0.0f) { - all_moves[e_move_type::NOC_ATTRACTION_CENTROID] = std::make_unique(noc_attraction_weight, high_fanout_thresh); + all_moves[e_move_type::NOC_ATTRACTION_CENTROID] = std::make_unique(placer_state, noc_attraction_weight, high_fanout_thresh); } karmed_bandit_agent = std::move(agent); diff --git a/vpr/src/place/static_move_generator.cpp b/vpr/src/place/static_move_generator.cpp index b5920f1ffeb..ed54455621d 100644 --- a/vpr/src/place/static_move_generator.cpp +++ b/vpr/src/place/static_move_generator.cpp @@ -12,16 +12,18 @@ #include "vtr_random.h" #include "vtr_assert.h" -StaticMoveGenerator::StaticMoveGenerator(const vtr::vector& move_probs) { +StaticMoveGenerator::StaticMoveGenerator(PlacerState& placer_state, + const vtr::vector& move_probs) + : MoveGenerator(placer_state) { all_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES); - all_moves[e_move_type::UNIFORM] = std::make_unique(); - all_moves[e_move_type::MEDIAN] = std::make_unique(); - all_moves[e_move_type::CENTROID] = std::make_unique(); - all_moves[e_move_type::W_CENTROID] = std::make_unique(); - all_moves[e_move_type::W_MEDIAN] = std::make_unique(); - all_moves[e_move_type::CRIT_UNIFORM] = std::make_unique(); - all_moves[e_move_type::FEASIBLE_REGION] = std::make_unique(); + all_moves[e_move_type::UNIFORM] = std::make_unique(placer_state); + all_moves[e_move_type::MEDIAN] = std::make_unique(placer_state); + all_moves[e_move_type::CENTROID] = std::make_unique(placer_state); + all_moves[e_move_type::W_CENTROID] = std::make_unique(placer_state); + all_moves[e_move_type::W_MEDIAN] = std::make_unique(placer_state); + all_moves[e_move_type::CRIT_UNIFORM] = std::make_unique(placer_state); + all_moves[e_move_type::FEASIBLE_REGION] = std::make_unique(placer_state); initialize_move_prob(move_probs); } diff --git a/vpr/src/place/static_move_generator.h b/vpr/src/place/static_move_generator.h index 56b42eea671..1d63e7486a1 100644 --- a/vpr/src/place/static_move_generator.h +++ b/vpr/src/place/static_move_generator.h @@ -17,7 +17,9 @@ class StaticMoveGenerator : public MoveGenerator { void initialize_move_prob(const vtr::vector& move_probs); public: - StaticMoveGenerator(const vtr::vector& move_probs); + StaticMoveGenerator() = delete; + StaticMoveGenerator(PlacerState& placer_state, + const vtr::vector& move_probs); e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp index 33b991f7dcd..0ab198cb8cc 100644 --- a/vpr/src/place/timing_place.cpp +++ b/vpr/src/place/timing_place.cpp @@ -13,10 +13,10 @@ #include "vpr_types.h" #include "vpr_utils.h" #include "globals.h" -#include "placer_globals.h" #include "net_delay.h" #include "timing_place_lookup.h" #include "timing_place.h" +#include "placer_state.h" #include "timing_info.h" @@ -37,7 +37,9 @@ PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, cons * * If the criticality exponent has changed, we also need to update from scratch. */ -void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_info, const PlaceCritParams& crit_params) { +void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_info, + const PlaceCritParams& crit_params, + PlacerState& placer_state) { /* If update is not enabled, exit the routine. */ if (!update_enabled) { /* re-computation is required on the next iteration */ @@ -55,7 +57,7 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf last_crit_exponent_ = crit_params.crit_exponent; } - auto& place_move_ctx = g_placer_ctx.mutable_move(); + auto& place_move_ctx = placer_state.mutable_move(); /* Performs a 1-to-1 mapping from criticality to timing_place_crit_. * For every pin on every net (or, equivalently, for every tedge ending diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h index bd74e52e2db..7ccf73c12f4 100644 --- a/vpr/src/place/timing_place.h +++ b/vpr/src/place/timing_place.h @@ -125,7 +125,9 @@ class PlacerCriticalities { * If out of sync, then the criticalities cannot be incrementally updated on * during the next timing analysis iteration. */ - void update_criticalities(const SetupTimingInfo* timing_info, const PlaceCritParams& crit_params); + void update_criticalities(const SetupTimingInfo* timing_info, + const PlaceCritParams& crit_params, + PlacerState& placer_state); ///@bried Enable the recompute_required flag to enforce from scratch update. void set_recompute_required(); @@ -362,7 +364,7 @@ class PlacerTimingCosts { //Walk through the netlist to determine how many connections there are. size_t iconn = 0; for (ClusterNetId net : nets) { - //The placer always skips 'ignored' nets so they don't effect timing + //The placer always skips 'ignored' nets, so they don't affect timing //costs, so we also skip them here if (nlist.net_is_ignored(net)) { net_start_indicies_[net] = OPEN; @@ -442,7 +444,7 @@ class PlacerTimingCosts { * * Useful for client code operating on the cost values (e.g. difference between costs). */ - operator double() { + operator double() const { return connection_cost_; } @@ -467,6 +469,10 @@ class PlacerTimingCosts { return ConnectionProxy(timing_costs_, net_sink_costs_[ipin]); } + const ConnectionProxy operator[](size_t ipin) const { + return ConnectionProxy(timing_costs_, net_sink_costs_[ipin]); + } + private: PlacerTimingCosts* timing_costs_; double* net_sink_costs_; @@ -480,6 +486,13 @@ class PlacerTimingCosts { return NetProxy(this, net_connection_costs); } + NetProxy operator[](ClusterNetId net_id) const { + VTR_ASSERT_SAFE(net_start_indicies_[net_id] >= 0); + + const double* net_connection_costs = &connection_costs_[net_start_indicies_[net_id]]; + return NetProxy(const_cast(this), const_cast(net_connection_costs)); + } + void clear() { connection_costs_.clear(); net_start_indicies_.clear(); diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index 638964e66d7..c16a0d6dbad 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -182,8 +182,6 @@ std::unique_ptr compute_place_delay_model(const t_placer_opts& bool is_flat) { vtr::ScopedStartFinishTimer timer("Computing placement delta delay look-up"); - init_placement_context(); - t_chan_width chan_width = setup_chan_width(router_opts, chan_width_dist); alloc_routing_structs(chan_width, router_opts, det_routing_arch, segment_inf, @@ -327,14 +325,15 @@ std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ } static int get_longest_segment_length(std::vector& segment_inf) { - int length; + int length = 0; - length = 0; - for (size_t i = 0; i < segment_inf.size(); i++) { - if (segment_inf[i].length > length) - length = segment_inf[i].length; + for (const t_segment_inf &seg_info : segment_inf) { + if (seg_info.length > length) { + length = seg_info.length; + } } - return (length); + + return length; } static t_chan_width setup_chan_width(const t_router_opts& router_opts, diff --git a/vpr/src/place/uniform_move_generator.cpp b/vpr/src/place/uniform_move_generator.cpp index 6560c32af24..fc4e74dc56c 100644 --- a/vpr/src/place/uniform_move_generator.cpp +++ b/vpr/src/place/uniform_move_generator.cpp @@ -1,15 +1,31 @@ #include "uniform_move_generator.h" + #include "globals.h" #include "place_constraints.h" +#include "placer_state.h" #include "move_utils.h" -e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) { +UniformMoveGenerator::UniformMoveGenerator(PlacerState& placer_state) + : MoveGenerator(placer_state) {} + +e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& proposed_action, + float rlim, + const t_placer_opts& placer_opts, + const PlacerCriticalities* /*criticalities*/) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& placer_state = placer_state_.get(); + const auto& block_locs = placer_state.block_locs(); + const auto& blk_loc_registry = placer_state.blk_loc_registry(); + //Find a movable block based on blk_type ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, - false, - nullptr, - nullptr); + /*highly_crit_block=*/false, + /*net_from=*/nullptr, + /*pin_from=*/nullptr, + placer_state); + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "Uniform Move Choose Block %d - rlim %f\n", size_t(b_from), rlim); if (!b_from) { //No movable block found @@ -17,16 +33,13 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks return e_create_move::ABORT; } - auto& place_ctx = g_vpr_ctx.placement(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - t_pl_loc from = place_ctx.block_locs[b_from].loc; + t_pl_loc from = block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); t_pl_loc to; - if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) { + if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from, blk_loc_registry)) { return e_create_move::ABORT; } @@ -44,7 +57,7 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks VTR_LOG("\n"); #endif - e_create_move create_move = ::create_move(blocks_affected, b_from, to); + e_create_move create_move = ::create_move(blocks_affected, b_from, to, blk_loc_registry); //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap if (!floorplan_legal(blocks_affected)) { diff --git a/vpr/src/place/uniform_move_generator.h b/vpr/src/place/uniform_move_generator.h index 0ea4a8a9d8d..190a9fb7d31 100644 --- a/vpr/src/place/uniform_move_generator.h +++ b/vpr/src/place/uniform_move_generator.h @@ -9,7 +9,16 @@ * a range limit centered on from_block in the compressed block grid space */ class UniformMoveGenerator : public MoveGenerator { - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) override; + public: + UniformMoveGenerator() = delete; + explicit UniformMoveGenerator(PlacerState& placer_state); + + private: + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& proposed_action, + float rlim, + const t_placer_opts& /*placer_opts*/, + const PlacerCriticalities* /*criticalities*/) override; }; #endif diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp index 93dd5c796f8..3350c486508 100644 --- a/vpr/src/place/weighted_centroid_move_generator.cpp +++ b/vpr/src/place/weighted_centroid_move_generator.cpp @@ -1,16 +1,34 @@ #include "weighted_centroid_move_generator.h" + #include "globals.h" #include "directed_moves_util.h" #include "place_constraints.h" +#include "placer_state.h" #include "move_utils.h" -e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { +WeightedCentroidMoveGenerator::WeightedCentroidMoveGenerator(PlacerState& placer_state) + : MoveGenerator(placer_state) {} + +e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& proposed_action, + float rlim, + const t_placer_opts& placer_opts, + const PlacerCriticalities* criticalities) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& device_ctx = g_vpr_ctx.device(); + auto& placer_state = placer_state_.get(); + const auto& block_locs = placer_state.block_locs(); + auto& place_move_ctx = placer_state.mutable_move(); + const auto& blk_loc_registry = placer_state.blk_loc_registry(); + //Find a movable block based on blk_type ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, - false, - nullptr, - nullptr); + /*highly_crit_block=*/false, + /*net_from=*/nullptr, + /*pin_from=*/nullptr, + placer_state); + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "Weighted Centroid Move Choose Block %d - rlim %f\n", size_t(b_from), rlim); if (!b_from) { //No movable block found @@ -18,13 +36,7 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move return e_create_move::ABORT; } - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); - auto& device_ctx = g_vpr_ctx.device(); - - auto& place_move_ctx = g_placer_ctx.mutable_move(); - - t_pl_loc from = place_ctx.block_locs[b_from].loc; + t_pl_loc from = block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); auto grid_from_type = device_ctx.grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); @@ -36,16 +48,16 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move t_pl_loc to, centroid; /* Calculate the weighted centroid */ - calculate_centroid_loc(b_from, true, centroid, criticalities); + calculate_centroid_loc(b_from, true, centroid, criticalities, blk_loc_registry); // Centroid location is not necessarily a valid location, and the downstream location expect a valid // layer for "to" location. So if the layer is not valid, we set it to the same layer as from loc. centroid.layer = (centroid.layer < 0) ? from.layer : centroid.layer; - if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) { + if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from, blk_loc_registry)) { return e_create_move::ABORT; } - e_create_move create_move = ::create_move(blocks_affected, b_from, to); + e_create_move create_move = ::create_move(blocks_affected, b_from, to, blk_loc_registry); //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap if (!floorplan_legal(blocks_affected)) { diff --git a/vpr/src/place/weighted_centroid_move_generator.h b/vpr/src/place/weighted_centroid_move_generator.h index 7aea1b6941c..30725bcd3ac 100644 --- a/vpr/src/place/weighted_centroid_move_generator.h +++ b/vpr/src/place/weighted_centroid_move_generator.h @@ -13,7 +13,16 @@ * "Learn to Place: FPGA Placement using Reinforcement Learning and Directed Moves", ICFPT2020 */ class WeightedCentroidMoveGenerator : public MoveGenerator { - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) override; + public: + WeightedCentroidMoveGenerator() = delete; + explicit WeightedCentroidMoveGenerator(PlacerState& placer_state); + + private: + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& proposed_action, + float rlim, + const t_placer_opts& placer_opts, + const PlacerCriticalities* criticalities) override; }; #endif diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp index a9e2aaac526..e1581a24123 100644 --- a/vpr/src/place/weighted_median_move_generator.cpp +++ b/vpr/src/place/weighted_median_move_generator.cpp @@ -1,21 +1,37 @@ #include "weighted_median_move_generator.h" + #include "globals.h" -#include -#include "math.h" #include "place_constraints.h" +#include "placer_state.h" #include "move_utils.h" +#include +#include + #define CRIT_MULT_FOR_W_MEDIAN 10 -static void get_bb_cost_for_net_excluding_block(ClusterNetId net_id, ClusterBlockId block_id, ClusterPinId moving_pin_id, const PlacerCriticalities* criticalities, t_bb_cost* coords, bool& skip_net); +WeightedMedianMoveGenerator::WeightedMedianMoveGenerator(PlacerState& placer_state) + : MoveGenerator(placer_state) {} + +e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& proposed_action, + float rlim, + const t_placer_opts& placer_opts, + const PlacerCriticalities* criticalities) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& placer_state = placer_state_.get(); + const auto& block_locs = placer_state.block_locs(); + auto& place_move_ctx = placer_state.mutable_move(); + const auto& blk_loc_registry = placer_state.blk_loc_registry(); -e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { //Find a movable block based on blk_type ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, - false, - nullptr, - nullptr); + /*highly_crit_block=*/false, + /*net_from=*/nullptr, + /*pin_from=*/nullptr, + placer_state); + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "Weighted Median Move Choose Block %d - rlim %f\n", size_t(b_from), rlim); if (!b_from) { //No movable block found @@ -23,14 +39,12 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& return e_create_move::ABORT; } - auto& place_ctx = g_vpr_ctx.placement(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_move_ctx = g_placer_ctx.mutable_move(); + int num_layers = g_vpr_ctx.device().grid.get_num_layers(); - t_pl_loc from = place_ctx.block_locs[b_from].loc; + t_pl_loc from = block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); @@ -48,9 +62,6 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& place_move_ctx.layer_coord.clear(); std::vector layer_blk_cnt(num_layers, 0); - //true if the net is a feedback from the block to itself (all the net terminals are connected to the same block) - bool skip_net; - //iterate over block pins for (ClusterPinId pin_id : cluster_ctx.clb_nlist.block_pins(b_from)) { ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(pin_id); @@ -63,12 +74,15 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& * * Note: skip_net returns true if this net should be skipped. Currently, the only case to skip a net is the feedback nets * (a net that all its terminals connected to the same block). Logically, this net should be neglected as it is only connected - * to the moving block. Experimentally, we found that including these nets into calculations badly affect the averall placement + * to the moving block. Experimentally, we found that including these nets into calculations badly affect the overall placement * solution especillay for some large designs. + * + * Note: skip_net true if the net is a feedback from the block to itself (all the net terminals are connected to the same block) */ - get_bb_cost_for_net_excluding_block(net_id, b_from, pin_id, criticalities, &coords, skip_net); - if (skip_net) + bool skip_net = get_bb_cost_for_net_excluding_block(net_id, pin_id, criticalities, &coords); + if (skip_net) { continue; + } // We need to insert the calculated edges in the X,Y vectors multiple times based on the criticality of the pin that caused each of them. // As all the criticalities are [0,1], we map it to [0,CRIT_MULT_FOR_W_MEDIAN] inserts in the vectors for each edge @@ -124,11 +138,11 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& w_median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2; w_median_point.layer = ((limit_coords.layer_min + limit_coords.layer_max) / 2); - if (!find_to_loc_centroid(cluster_from_type, from, w_median_point, range_limiters, to, b_from)) { + if (!find_to_loc_centroid(cluster_from_type, from, w_median_point, range_limiters, to, b_from, blk_loc_registry)) { return e_create_move::ABORT; } - e_create_move create_move = ::create_move(blocks_affected, b_from, to); + e_create_move create_move = ::create_move(blocks_affected, b_from, to, blk_loc_registry); //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap if (!floorplan_legal(blocks_affected)) { @@ -138,73 +152,59 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& return create_move; } -/** - * This routine finds the bounding box and the cost of each side of the bounding box, - * which is defined as the criticality of the connection that led to the bounding box extending - * that far. If more than one terminal leads to a bounding box edge, w pick the cost using the criticality of the first one. - * This is helpful in computing weighted median moves. - * - * Outputs: - * - coords: the bounding box and the edge costs - * - skip_net: returns whether this net should be skipped in calculation or not - * - * Inputs: - * - net_id: The net we are considering - * - moving_pin_id: pin (which should be on this net) on a block that is being moved. - * - criticalities: the timing criticalities of all connections - */ -static void get_bb_cost_for_net_excluding_block(ClusterNetId net_id, ClusterBlockId, ClusterPinId moving_pin_id, const PlacerCriticalities* criticalities, t_bb_cost* coords, bool& skip_net) { - int pnum, x, y, layer, xmin, xmax, ymin, ymax, layer_min, layer_max; - float xmin_cost, xmax_cost, ymin_cost, ymax_cost, layer_min_cost, layer_max_cost, cost; - - skip_net = true; - - xmin = 0; - xmax = 0; - ymin = 0; - ymax = 0; - layer_min = 0; - layer_max = 0; - - cost = 0.0; - xmin_cost = 0.0; - xmax_cost = 0.0; - ymin_cost = 0.0; - ymax_cost = 0.0; - layer_min_cost = 0.; - layer_max_cost = 0.; +bool WeightedMedianMoveGenerator::get_bb_cost_for_net_excluding_block(ClusterNetId net_id, + ClusterPinId moving_pin_id, + const PlacerCriticalities* criticalities, + t_bb_cost* coords) { + const auto& blk_loc_registry = placer_state_.get().blk_loc_registry(); + const auto& block_locs = blk_loc_registry.block_locs(); + + bool skip_net = true; + + int xmin = 0; + int xmax = 0; + int ymin = 0; + int ymax = 0; + int layer_min = 0; + int layer_max = 0; + + float xmin_cost = 0.f; + float xmax_cost = 0.f; + float ymin_cost = 0.f; + float ymax_cost = 0.f; + float layer_min_cost = 0.f; + float layer_max_cost = 0.f; auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.placement(); auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; - ClusterBlockId bnum; bool is_first_block = true; - int ipin; - for (auto pin_id : cluster_ctx.clb_nlist.net_pins(net_id)) { - bnum = cluster_ctx.clb_nlist.pin_block(pin_id); - layer = place_ctx.block_locs[bnum].loc.layer; + for (ClusterPinId pin_id : cluster_ctx.clb_nlist.net_pins(net_id)) { + ClusterBlockId bnum = cluster_ctx.clb_nlist.pin_block(pin_id); if (pin_id != moving_pin_id) { skip_net = false; - pnum = tile_pin_index(pin_id); + int pnum = blk_loc_registry.tile_pin_index(pin_id); /** * Calculates the pin index of the correct pin to calculate the required connection * * if the current pin is the driver, we only care about one sink (the moving pin) * else if the current pin is a sink, calculate the criticality of itself */ + int ipin; if (cluster_ctx.clb_nlist.pin_type(pin_id) == PinType::DRIVER) { ipin = cluster_ctx.clb_nlist.pin_net_index(moving_pin_id); } else { ipin = cluster_ctx.clb_nlist.pin_net_index(pin_id); } - cost = criticalities->criticality(net_id, ipin); + float cost = criticalities->criticality(net_id, ipin); VTR_ASSERT(pnum >= 0); - x = place_ctx.block_locs[bnum].loc.x + physical_tile_type(bnum)->pin_width_offset[pnum]; - y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum]; + const t_pl_loc block_loc = block_locs[bnum].loc; + int x = block_loc.x + physical_tile_type(block_loc)->pin_width_offset[pnum]; + int y = block_loc.y + physical_tile_type(block_loc)->pin_height_offset[pnum]; + int layer = block_loc.layer; x = std::max(std::min(x, (int)grid.width() - 2), 1); //-2 for no perim channels y = std::max(std::min(y, (int)grid.height() - 2), 1); //-2 for no perim channels @@ -264,4 +264,7 @@ static void get_bb_cost_for_net_excluding_block(ClusterNetId net_id, ClusterBloc coords->ymax = {ymax, ymax_cost}; coords->layer_min = {layer_min, layer_min_cost}; coords->layer_max = {layer_max, layer_max_cost}; + + return skip_net; } + diff --git a/vpr/src/place/weighted_median_move_generator.h b/vpr/src/place/weighted_median_move_generator.h index c0be89b7c5f..e6a3b11be89 100644 --- a/vpr/src/place/weighted_median_move_generator.h +++ b/vpr/src/place/weighted_median_move_generator.h @@ -13,7 +13,32 @@ * "Learn to Place: FPGA Placement using Reinforcement Learning and Directed Moves", ICFPT2020 */ class WeightedMedianMoveGenerator : public MoveGenerator { - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) override; + public: + WeightedMedianMoveGenerator() = delete; + explicit WeightedMedianMoveGenerator(PlacerState& placer_state); + + private: + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, + t_propose_action& proposed_action, + float rlim, + const t_placer_opts& placer_opts, + const PlacerCriticalities* criticalities) override; + + /** + * @brief Finds the bounding box and the cost of each side of the bounding box, + * which is defined as the criticality of the connection that led to the bounding box extending + * that far. If more than one terminal leads to a bounding box edge, w pick the cost using the criticality of the first one. + * + * @param net_id The net we are considering + * @param moving_pin_id pin (which should be on this net) on a block that is being moved. + * @param criticalities the timing criticalities of all connections + * @param coords the bounding box and the edge costs to be filled by this method + * @return bool Whether this net should be skipped in calculation or not + */ + bool get_bb_cost_for_net_excluding_block(ClusterNetId net_id, + ClusterPinId moving_pin_id, + const PlacerCriticalities* criticalities, + t_bb_cost* coords); }; #endif diff --git a/vpr/src/power/power.cpp b/vpr/src/power/power.cpp index 94d55479580..847930433c7 100644 --- a/vpr/src/power/power.cpp +++ b/vpr/src/power/power.cpp @@ -626,9 +626,9 @@ static void power_usage_blocks(t_power_usage* power_usage) { t_pb* pb = nullptr; t_power_usage pb_power; - ClusterBlockId iblk = place_ctx.grid_blocks.block_at_location({x, y, z, layer_num}); + ClusterBlockId iblk = place_ctx.grid_blocks().block_at_location({x, y, z, layer_num}); - if (iblk != EMPTY_BLOCK_ID && iblk != INVALID_BLOCK_ID) { + if (iblk) { pb = cluster_ctx.clb_nlist.block_pb(iblk); logical_block = cluster_ctx.clb_nlist.block_type(iblk); } else { @@ -642,7 +642,6 @@ static void power_usage_blocks(t_power_usage* power_usage) { } } } - return; } /** diff --git a/vpr/src/route/check_route.cpp b/vpr/src/route/check_route.cpp index a068210d416..fd239d39b52 100644 --- a/vpr/src/route/check_route.cpp +++ b/vpr/src/route/check_route.cpp @@ -488,6 +488,7 @@ void recompute_occupancy_from_scratch(const Netlist<>& net_list, bool is_flat) { */ auto& route_ctx = g_vpr_ctx.mutable_routing(); auto& device_ctx = g_vpr_ctx.device(); + auto& block_locs = g_vpr_ctx.placement().block_locs(); /* First set the occupancy of everything to zero. */ for (RRNodeId inode : device_ctx.rr_graph.nodes()) @@ -514,8 +515,9 @@ void recompute_occupancy_from_scratch(const Netlist<>& net_list, bool is_flat) { * (CLB outputs used up by being directly wired to subblocks used only * * locally). */ for (auto blk_id : net_list.blocks()) { - auto cluster_blk_id = convert_to_cluster_block_id(blk_id); - for (int iclass = 0; iclass < (int)physical_tile_type(cluster_blk_id)->class_inf.size(); iclass++) { + ClusterBlockId cluster_blk_id = convert_to_cluster_block_id(blk_id); + t_pl_loc block_loc = block_locs[cluster_blk_id].loc; + for (int iclass = 0; iclass < (int)physical_tile_type(block_loc)->class_inf.size(); iclass++) { int num_local_opins = route_ctx.clb_opins_used_locally[cluster_blk_id][iclass].size(); /* Will always be 0 for pads or SINK classes. */ for (int ipin = 0; ipin < num_local_opins; ipin++) { @@ -533,20 +535,20 @@ static void check_locally_used_clb_opins(const t_clb_opins_used& clb_opins_used_ bool is_flat) { /* Checks that enough OPINs on CLBs have been set aside (used up) to make a * * legal routing if subblocks connect to OPINs directly. */ - - int iclass, num_local_opins, ipin; t_rr_type rr_type; auto& cluster_ctx = g_vpr_ctx.clustering(); auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; + auto& block_locs = g_vpr_ctx.placement().block_locs(); - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - for (iclass = 0; iclass < (int)physical_tile_type(blk_id)->class_inf.size(); iclass++) { - num_local_opins = clb_opins_used_locally[blk_id][iclass].size(); + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + t_pl_loc block_loc = block_locs[blk_id].loc; + for (int iclass = 0; iclass < (int)physical_tile_type(block_loc)->class_inf.size(); iclass++) { + int num_local_opins = clb_opins_used_locally[blk_id][iclass].size(); /* Always 0 for pads and for SINK classes */ - for (ipin = 0; ipin < num_local_opins; ipin++) { + for (int ipin = 0; ipin < num_local_opins; ipin++) { RRNodeId inode = clb_opins_used_locally[blk_id][iclass][ipin]; check_node_and_range(RRNodeId(inode), route_type, is_flat); /* Node makes sense? */ @@ -561,11 +563,11 @@ static void check_locally_used_clb_opins(const t_clb_opins_used& clb_opins_used_ } ipin = rr_graph.node_pin_num(RRNodeId(inode)); - if (get_class_num_from_pin_physical_num(physical_tile_type(blk_id), ipin) != iclass) { + if (get_class_num_from_pin_physical_num(physical_tile_type(block_loc), ipin) != iclass) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "in check_locally_used_opins: block #%lu (%s):\n" "\tExpected class %d local OPIN has class %d -- rr_node #: %d.\n", - size_t(blk_id), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), iclass, get_class_num_from_pin_physical_num(physical_tile_type(blk_id), ipin), inode); + size_t(blk_id), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), iclass, get_class_num_from_pin_physical_num(physical_tile_type(block_loc), ipin), inode); } } } diff --git a/vpr/src/route/overuse_report.cpp b/vpr/src/route/overuse_report.cpp index 2e07f446314..688388b8689 100644 --- a/vpr/src/route/overuse_report.cpp +++ b/vpr/src/route/overuse_report.cpp @@ -242,7 +242,7 @@ static void report_overused_ipin_opin(std::ostream& os, //Add block type for IPINs/OPINs in overused rr-node report const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - const auto& grid_info = place_ctx.grid_blocks; + const auto& grid_info = place_ctx.grid_blocks(); os << "Grid location: X = " << grid_x << ", Y = " << grid_y << '\n'; os << "Number of blocks currently occupying this grid location = " << grid_info.get_usage({grid_x, grid_y, grid_layer}) << '\n'; @@ -336,7 +336,7 @@ static void report_congested_nets(const Netlist<>& net_list, } else { cluster_block_id = convert_to_cluster_block_id(net_list.pin_block(sink_id)); } - auto cluster_loc = g_vpr_ctx.placement().block_locs[cluster_block_id]; + auto cluster_loc = g_vpr_ctx.placement().block_locs()[cluster_block_id]; auto physical_type = g_vpr_ctx.device().grid.get_physical_type({x, y, layer_num}); int cluster_layer_num = cluster_loc.loc.layer; int cluster_x = cluster_loc.loc.x - g_vpr_ctx.device().grid.get_physical_type({cluster_loc.loc.x, cluster_loc.loc.y, cluster_layer_num})->width; @@ -353,7 +353,7 @@ static void report_congested_nets(const Netlist<>& net_list, os << " " << "Hierarchical Type Name : " << pb_pin->parent_node->hierarchical_type_name() << "\n"; } else { - os << " " << g_vpr_ctx.placement().physical_pins[convert_to_cluster_pin_id(sink_id)] << "\n"; + os << " " << g_vpr_ctx.placement().physical_pins()[convert_to_cluster_pin_id(sink_id)] << "\n"; } } } diff --git a/vpr/src/route/route.cpp b/vpr/src/route/route.cpp index d7488910834..1816f610f9f 100644 --- a/vpr/src/route/route.cpp +++ b/vpr/src/route/route.cpp @@ -64,7 +64,7 @@ bool route(const Netlist<>& net_list, is_flat); //Initialize drawing, now that we have an RR graph - init_draw_coords(width_fac); + init_draw_coords(width_fac, g_vpr_ctx.placement().blk_loc_registry()); /* Allocate and load additional rr_graph information needed only by the router. */ alloc_and_load_rr_node_route_structs(); diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index 74a84472388..b72b78cdaf1 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -337,11 +337,13 @@ static t_clb_opins_used alloc_and_load_clb_opins_used_locally() { int clb_pin, iclass; auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& block_locs = g_vpr_ctx.placement().block_locs(); clb_opins_used_locally.resize(cluster_ctx.clb_nlist.blocks().size()); - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - auto type = physical_tile_type(blk_id); + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + t_pl_loc block_loc = block_locs[blk_id].loc; + auto type = physical_tile_type(block_loc); auto sub_tile = type->sub_tiles[get_sub_tile_index(blk_id)]; auto class_range = get_class_range_for_block(blk_id); @@ -350,12 +352,10 @@ static t_clb_opins_used alloc_and_load_clb_opins_used_locally() { if (is_io_type(type)) continue; - int pin_low = 0; - int pin_high = 0; - get_pin_range_for_block(blk_id, &pin_low, &pin_high); + const auto [pin_low, pin_high] = get_pin_range_for_block(blk_id); for (clb_pin = pin_low; clb_pin <= pin_high; clb_pin++) { - auto net = cluster_ctx.clb_nlist.block_net(blk_id, clb_pin); + ClusterNetId net = cluster_ctx.clb_nlist.block_net(blk_id, clb_pin); if (!net || (net && cluster_ctx.clb_nlist.net_sinks(net).size() == 0)) { //There is no external net connected to this pin @@ -787,10 +787,12 @@ void reserve_locally_used_opins(HeapInterface* heap, float pres_fac, float acc_f auto& route_ctx = g_vpr_ctx.mutable_routing(); auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; + auto& block_locs = g_vpr_ctx.placement().block_locs(); if (rip_up_local_opins) { - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - type = physical_tile_type(blk_id); + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + t_pl_loc block_loc = block_locs[blk_id].loc; + type = physical_tile_type(block_loc); for (iclass = 0; iclass < (int)type->class_inf.size(); iclass++) { num_local_opin = route_ctx.clb_opins_used_locally[blk_id][iclass].size(); @@ -811,8 +813,9 @@ void reserve_locally_used_opins(HeapInterface* heap, float pres_fac, float acc_f // Make sure heap is empty before we add nodes to the heap. heap->empty_heap(); - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - type = physical_tile_type(blk_id); + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + t_pl_loc block_loc = block_locs[blk_id].loc; + type = physical_tile_type(block_loc); for (iclass = 0; iclass < (int)type->class_inf.size(); iclass++) { num_local_opin = route_ctx.clb_opins_used_locally[blk_id][iclass].size(); diff --git a/vpr/src/route/route_utils.cpp b/vpr/src/route/route_utils.cpp index 21e0b52bbef..bb89d89fdee 100644 --- a/vpr/src/route/route_utils.cpp +++ b/vpr/src/route/route_utils.cpp @@ -18,13 +18,12 @@ bool check_net_delays(const Netlist<>& net_list, NetPinsMatrix& net_delay) { constexpr float ERROR_TOL = 0.0001; - unsigned int ipin; auto net_delay_check = make_net_pins_matrix(net_list); load_net_delay_from_routing(net_list, net_delay_check); for (auto net_id : net_list.nets()) { - for (ipin = 1; ipin < net_list.net_pins(net_id).size(); ipin++) { + for (size_t ipin = 1; ipin < net_list.net_pins(net_id).size(); ipin++) { if (net_delay_check[net_id][ipin] == 0.) { /* Should be only GLOBAL nets */ if (fabs(net_delay[net_id][ipin]) > ERROR_TOL) { VPR_ERROR(VPR_ERROR_ROUTE, @@ -220,8 +219,9 @@ void generate_route_timing_reports(const t_router_opts& router_opts, bool is_flat) { auto& timing_ctx = g_vpr_ctx.timing(); auto& atom_ctx = g_vpr_ctx.atom(); + const auto& blk_loc_registry = g_vpr_ctx.placement().blk_loc_registry(); - VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, delay_calc, is_flat); + VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, delay_calc, is_flat, blk_loc_registry); resolver.set_detail_level(analysis_opts.timing_report_detail); tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph, *timing_ctx.constraints); diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 87c3155c752..a5129dc5cf5 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -832,11 +832,11 @@ static void add_intra_cluster_edges_rr_graph(RRGraphBuilder& rr_graph_builder, VTR_ASSERT(is_flat); /* This function should be called if placement is done! */ - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = g_vpr_ctx.placement().block_locs(); auto& cluster_net_list = g_vpr_ctx.clustering().clb_nlist; int num_collapsed_nodes = 0; - for (auto cluster_blk_id : cluster_net_list.blocks()) { - auto block_loc = place_ctx.block_locs[cluster_blk_id].loc; + for (ClusterBlockId cluster_blk_id : cluster_net_list.blocks()) { + t_pl_loc block_loc = block_locs[cluster_blk_id].loc; int i = block_loc.x; int j = block_loc.y; int layer = block_loc.layer; @@ -2234,17 +2234,12 @@ static void set_clusters_pin_chains(const ClusteredNetlist& clb_nlist, bool is_flat) { VTR_ASSERT(is_flat); - const auto& place_ctx = g_vpr_ctx.placement(); + const auto& block_locs = g_vpr_ctx.placement().block_locs(); - t_physical_tile_type_ptr physical_type; - t_logical_block_type_ptr logical_block; - const t_sub_tile* sub_tile; - int rel_cap; - - for (auto cluster_blk_id : clb_nlist.blocks()) { - auto block_loc = place_ctx.block_locs[cluster_blk_id].loc; + for (ClusterBlockId cluster_blk_id : clb_nlist.blocks()) { + t_pl_loc block_loc = block_locs[cluster_blk_id].loc; int abs_cap = block_loc.sub_tile; - std::tie(physical_type, sub_tile, rel_cap, logical_block) = get_cluster_blk_physical_spec(cluster_blk_id); + const auto [physical_type, sub_tile, rel_cap, logical_block] = get_cluster_blk_physical_spec(cluster_blk_id); auto cluster_pins = get_cluster_block_pins(physical_type, cluster_blk_id, diff --git a/vpr/src/server/pathhelper.cpp b/vpr/src/server/pathhelper.cpp index 4776d7216e5..9e9d8a7b576 100644 --- a/vpr/src/server/pathhelper.cpp +++ b/vpr/src/server/pathhelper.cpp @@ -39,13 +39,13 @@ CritPathsResultPtr calc_critical_path(const std::string& report_type, int crit_p auto& timing_ctx = g_vpr_ctx.timing(); auto& atom_ctx = g_vpr_ctx.atom(); - // + const auto& blk_loc_registry = g_vpr_ctx.placement().blk_loc_registry(); t_analysis_opts analysis_opts; analysis_opts.timing_report_detail = details_level; analysis_opts.timing_report_npaths = crit_path_num; - VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, *routing_delay_calc, is_flat_routing); + VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, *routing_delay_calc, is_flat_routing, blk_loc_registry); resolver.set_detail_level(analysis_opts.timing_report_detail); tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph, *timing_ctx.constraints); diff --git a/vpr/src/timing/PreClusterTimingGraphResolver.cpp b/vpr/src/timing/PreClusterTimingGraphResolver.cpp index c6de97ae868..c94d961c84f 100644 --- a/vpr/src/timing/PreClusterTimingGraphResolver.cpp +++ b/vpr/src/timing/PreClusterTimingGraphResolver.cpp @@ -27,12 +27,13 @@ std::string PreClusterTimingGraphResolver::node_type_name(tatum::NodeId node) co if (detail_level() == e_timing_report_detail::AGGREGATED) { //Annotate primitive grid location, if known auto& atom_ctx = g_vpr_ctx.atom(); - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = g_vpr_ctx.placement().block_locs(); ClusterBlockId cb = atom_ctx.lookup.atom_clb(blk); - if (cb && place_ctx.block_locs.count(cb)) { - int x = place_ctx.block_locs[cb].loc.x; - int y = place_ctx.block_locs[cb].loc.y; - name += " at (" + std::to_string(x) + "," + std::to_string(y) + ")"; + if (cb && block_locs.count(cb)) { + int x = block_locs[cb].loc.x; + int y = block_locs[cb].loc.y; + int layer = block_locs[cb].loc.layer; + name += " at (" + std::to_string(x) + ", " + std::to_string(y) + ", " + std::to_string(layer) + ")"; } } diff --git a/vpr/src/timing/VprTimingGraphResolver.cpp b/vpr/src/timing/VprTimingGraphResolver.cpp index cdc1124ef6e..fa5dc1ae960 100644 --- a/vpr/src/timing/VprTimingGraphResolver.cpp +++ b/vpr/src/timing/VprTimingGraphResolver.cpp @@ -6,12 +6,14 @@ VprTimingGraphResolver::VprTimingGraphResolver(const AtomNetlist& netlist, const AtomLookup& netlist_lookup, const tatum::TimingGraph& timing_graph, const AnalysisDelayCalculator& delay_calc, - bool is_flat) + bool is_flat, + const BlkLocRegistry& blk_loc_registry) : netlist_(netlist) , netlist_lookup_(netlist_lookup) , timing_graph_(timing_graph) , delay_calc_(delay_calc) - , is_flat_(is_flat) {} + , is_flat_(is_flat) + , blk_loc_registry_(blk_loc_registry) {} std::string VprTimingGraphResolver::node_name(tatum::NodeId node) const { AtomPinId pin = netlist_lookup_.tnode_atom_pin(node); @@ -31,12 +33,13 @@ std::string VprTimingGraphResolver::node_type_name(tatum::NodeId node) const { //Detailed report consist of the aggregated reported with a breakdown of inter-block routing //Annotate primitive grid location, if known auto& atom_ctx = g_vpr_ctx.atom(); - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = blk_loc_registry_.block_locs(); ClusterBlockId cb = atom_ctx.lookup.atom_clb(blk); - if (cb && place_ctx.block_locs.count(cb)) { - int x = place_ctx.block_locs[cb].loc.x; - int y = place_ctx.block_locs[cb].loc.y; - name += " at (" + std::to_string(x) + "," + std::to_string(y) + ")"; + if (cb && block_locs.count(cb)) { + int x = block_locs[cb].loc.x; + int y = block_locs[cb].loc.y; + int layer = block_locs[cb].loc.layer; + name += " at (" + std::to_string(x) + ", " + std::to_string(y) + ", " + std::to_string(layer) + ")"; } if (detail_level() == e_timing_report_detail::DEBUG) { name += " tnode(" + std::to_string(size_t(node)) + ")"; diff --git a/vpr/src/timing/VprTimingGraphResolver.h b/vpr/src/timing/VprTimingGraphResolver.h index 8faea482d10..7bb9eb3ba6a 100644 --- a/vpr/src/timing/VprTimingGraphResolver.h +++ b/vpr/src/timing/VprTimingGraphResolver.h @@ -6,13 +6,16 @@ #include "atom_lookup.h" #include "AnalysisDelayCalculator.h" +class BlkLocRegistry; + class VprTimingGraphResolver : public tatum::TimingGraphNameResolver { public: VprTimingGraphResolver(const AtomNetlist& netlist, const AtomLookup& netlist_lookup, const tatum::TimingGraph& timing_graph, const AnalysisDelayCalculator& delay_calc, - bool is_flat); + bool is_flat, + const BlkLocRegistry& blk_loc_registry); std::string node_name(tatum::NodeId node) const override; std::string node_type_name(tatum::NodeId node) const override; @@ -34,6 +37,8 @@ class VprTimingGraphResolver : public tatum::TimingGraphNameResolver { const AnalysisDelayCalculator& delay_calc_; e_timing_report_detail detail_level_ = e_timing_report_detail::NETLIST; bool is_flat_; + ///@brief contains information about the placement of clustered blocks. + const BlkLocRegistry& blk_loc_registry_; }; #endif diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index 6f47cf100cb..f4c1955e84b 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -18,6 +18,8 @@ #include "device_grid.h" #include "user_route_constraints.h" #include "re_cluster_util.h" +#include "placer_state.h" +#include "grid_block.h" /* This module contains subroutines that are used in several unrelated parts * * of VPR. They are VPR-specific utility routines. */ @@ -102,7 +104,7 @@ const t_model_ports* find_model_port(const t_model* model, const std::string& na } if (required) { - VPR_FATAL_ERROR(VPR_ERROR_ARCH, "Failed to find port '%s; on architecture modedl '%s'\n", name.c_str(), model->name); + VPR_FATAL_ERROR(VPR_ERROR_ARCH, "Failed to find port '%s; on architecture model '%s'\n", name.c_str(), model->name); } return nullptr; @@ -124,18 +126,19 @@ void sync_grid_to_blocks() { auto& device_ctx = g_vpr_ctx.device(); auto& device_grid = device_ctx.grid; - int num_layers = device_ctx.grid.get_num_layers(); + const int num_layers = device_ctx.grid.get_num_layers(); + + auto& grid_blocks = place_ctx.mutable_grid_blocks(); + auto& block_locs = place_ctx.block_locs(); /* Reset usage and allocate blocks list if needed */ - place_ctx.grid_blocks = GridBlock(device_grid.width(), - device_grid.height(), - device_ctx.grid.get_num_layers()); - auto& grid_blocks = place_ctx.grid_blocks; + grid_blocks = GridBlock(device_grid.width(), device_grid.height(), device_ctx.grid.get_num_layers()); + for (int layer_num = 0; layer_num < num_layers; layer_num++) { for (int x = 0; x < (int)device_grid.width(); ++x) { for (int y = 0; y < (int)device_grid.height(); ++y) { - const auto& type = device_ctx.grid.get_physical_type({x, y, layer_num}); + const t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type({x, y, layer_num}); grid_blocks.initialized_grid_block_at_location({x, y, layer_num}, type->capacity); } } @@ -143,14 +146,14 @@ void sync_grid_to_blocks() { /* Go through each block */ auto& cluster_ctx = g_vpr_ctx.clustering(); - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - const auto& blk_loc = place_ctx.block_locs[blk_id].loc; - int blk_x = place_ctx.block_locs[blk_id].loc.x; - int blk_y = place_ctx.block_locs[blk_id].loc.y; - int blk_z = place_ctx.block_locs[blk_id].loc.sub_tile; - int blk_layer = place_ctx.block_locs[blk_id].loc.layer; + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + const auto& blk_loc = block_locs[blk_id].loc; + int blk_x = block_locs[blk_id].loc.x; + int blk_y = block_locs[blk_id].loc.y; + int blk_z = block_locs[blk_id].loc.sub_tile; + int blk_layer = block_locs[blk_id].loc.layer; - auto type = physical_tile_type(blk_id); + auto type = physical_tile_type(blk_loc); /* Check range of block coords */ if (blk_x < 0 || blk_y < 0 @@ -170,8 +173,7 @@ void sync_grid_to_blocks() { } /* Check already in use */ - if ((EMPTY_BLOCK_ID != place_ctx.grid_blocks.block_at_location(blk_loc)) - && (INVALID_BLOCK_ID != place_ctx.grid_blocks.block_at_location(blk_loc))) { + if (grid_blocks.block_at_location(blk_loc)) { VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Location (%d, %d, %d, %d) is used more than once.\n", blk_x, blk_y, blk_z, blk_layer); } @@ -184,18 +186,9 @@ void sync_grid_to_blocks() { /* Set the block */ for (int width = 0; width < type->width; ++width) { for (int height = 0; height < type->height; ++height) { - place_ctx.grid_blocks.set_block_at_location({blk_x + width, - blk_y + height, - blk_z, - blk_layer}, - blk_id); - place_ctx.grid_blocks.set_usage({blk_x + width, - blk_y + height, - blk_layer}, - place_ctx.grid_blocks.get_usage({blk_x + width, - blk_y + height, - blk_layer}) - + 1); + grid_blocks.set_block_at_location({blk_x + width, blk_y + height, blk_z, blk_layer}, blk_id); + grid_blocks.increment_usage({blk_x + width, blk_y + height, blk_layer}); + VTR_ASSERT(device_ctx.grid.get_width_offset({blk_x + width, blk_y + height, blk_layer}) == width); VTR_ASSERT(device_ctx.grid.get_height_offset({blk_x + width, blk_y + height, blk_layer}) == height); } @@ -523,39 +516,41 @@ bool is_empty_type(t_logical_block_type_ptr type) { return type == device_ctx.EMPTY_LOGICAL_BLOCK_TYPE; } -t_physical_tile_type_ptr physical_tile_type(ClusterBlockId blk) { - auto& place_ctx = g_vpr_ctx.placement(); +t_physical_tile_type_ptr physical_tile_type(t_pl_loc loc) { auto& device_ctx = g_vpr_ctx.device(); - auto block_loc = place_ctx.block_locs[blk].loc; - - return device_ctx.grid.get_physical_type({block_loc.x, block_loc.y, block_loc.layer}); + return device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer}); } t_physical_tile_type_ptr physical_tile_type(AtomBlockId atom_blk) { auto& atom_look_up = g_vpr_ctx.atom().lookup; + auto& block_locs = g_vpr_ctx.placement().block_locs(); - auto cluster_blk = atom_look_up.atom_clb(atom_blk); + ClusterBlockId cluster_blk = atom_look_up.atom_clb(atom_blk); VTR_ASSERT(cluster_blk != ClusterBlockId::INVALID()); - return physical_tile_type(cluster_blk); + return physical_tile_type(block_locs[cluster_blk].loc); } t_physical_tile_type_ptr physical_tile_type(ParentBlockId blk_id, bool is_flat) { + auto& block_locs = g_vpr_ctx.placement().block_locs(); + if (is_flat) { return physical_tile_type(convert_to_atom_block_id(blk_id)); } else { - return physical_tile_type(convert_to_cluster_block_id(blk_id)); + ClusterBlockId cluster_blk_id = convert_to_cluster_block_id(blk_id); + t_pl_loc block_loc = block_locs[cluster_blk_id].loc; + return physical_tile_type(block_loc); } } -int get_sub_tile_index(ClusterBlockId blk) { - auto& place_ctx = g_vpr_ctx.placement(); +int get_sub_tile_index(ClusterBlockId blk, + const vtr::vector_map& block_locs) { auto& device_ctx = g_vpr_ctx.device(); auto& cluster_ctx = g_vpr_ctx.clustering(); auto logical_block = cluster_ctx.clb_nlist.block_type(blk); - auto block_loc = place_ctx.block_locs[blk]; + auto block_loc = block_locs[blk]; auto loc = block_loc.loc; int sub_tile_coordinate = loc.sub_tile; @@ -575,6 +570,11 @@ int get_sub_tile_index(ClusterBlockId blk) { VPR_THROW(VPR_ERROR_PLACE, "The Block Id %d has been placed in an impossible sub tile location.\n", blk); } +int get_sub_tile_index(ClusterBlockId blk) { + auto& block_locs = g_vpr_ctx.placement().block_locs(); + return get_sub_tile_index(blk, block_locs); +} + /* Each node in the pb_graph for a top-level pb_type can be uniquely identified * by its pins. Since the pins in a cluster of a certain type are densely indexed, * this function will find the pin index (int pin_count_in_cluster) of the first @@ -606,16 +606,17 @@ int get_unique_pb_graph_node_id(const t_pb_graph_node* pb_graph_node) { t_class_range get_class_range_for_block(const ClusterBlockId blk_id) { /* Assumes that the placement has been done so each block has a set of pins allocated to it */ - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = g_vpr_ctx.placement().block_locs(); - auto type = physical_tile_type(blk_id); + t_pl_loc block_loc = block_locs[blk_id].loc; + auto type = physical_tile_type(block_loc); auto sub_tile = type->sub_tiles[get_sub_tile_index(blk_id)]; int sub_tile_capacity = sub_tile.capacity.total(); auto class_range = sub_tile.class_range; int class_range_total = class_range.high - class_range.low + 1; VTR_ASSERT((class_range_total) % sub_tile_capacity == 0); - int rel_capacity = place_ctx.block_locs[blk_id].loc.sub_tile - sub_tile.capacity.low; + int rel_capacity = block_locs[blk_id].loc.sub_tile - sub_tile.capacity.low; t_class_range abs_class_range; abs_class_range.low = rel_capacity * (class_range_total / sub_tile_capacity) + class_range.low; @@ -627,13 +628,9 @@ t_class_range get_class_range_for_block(const ClusterBlockId blk_id) { t_class_range get_class_range_for_block(const AtomBlockId atom_blk) { auto& atom_look_up = g_vpr_ctx.atom().lookup; - auto cluster_blk = atom_look_up.atom_clb(atom_blk); + ClusterBlockId cluster_blk = atom_look_up.atom_clb(atom_blk); - t_physical_tile_type_ptr physical_tile; - const t_sub_tile* sub_tile; - int sub_tile_cap; - t_logical_block_type_ptr logical_block; - std::tie(physical_tile, sub_tile, sub_tile_cap, logical_block) = get_cluster_blk_physical_spec(cluster_blk); + auto [physical_tile, sub_tile, sub_tile_cap, logical_block] = get_cluster_blk_physical_spec(cluster_blk); const t_pb_graph_node* pb_graph_node = atom_look_up.atom_pb_graph_node(atom_blk); VTR_ASSERT(pb_graph_node != nullptr); return get_pb_graph_node_class_physical_range(physical_tile, @@ -651,23 +648,24 @@ t_class_range get_class_range_for_block(const ParentBlockId blk_id, bool is_flat } } -void get_pin_range_for_block(const ClusterBlockId blk_id, - int* pin_low, - int* pin_high) { +std::pair get_pin_range_for_block(const ClusterBlockId blk_id) { /* Assumes that the placement has been done so each block has a set of pins allocated to it */ - auto& place_ctx = g_vpr_ctx.placement(); + auto& block_locs = g_vpr_ctx.placement().block_locs(); - auto type = physical_tile_type(blk_id); + t_pl_loc block_loc = block_locs[blk_id].loc; + auto type = physical_tile_type(block_loc); auto sub_tile = type->sub_tiles[get_sub_tile_index(blk_id)]; int sub_tile_capacity = sub_tile.capacity.total(); VTR_ASSERT(sub_tile.num_phy_pins % sub_tile_capacity == 0); - int rel_capacity = place_ctx.block_locs[blk_id].loc.sub_tile - sub_tile.capacity.low; + int rel_capacity = block_loc.sub_tile - sub_tile.capacity.low; int rel_pin_low = rel_capacity * (sub_tile.num_phy_pins / sub_tile_capacity); int rel_pin_high = (rel_capacity + 1) * (sub_tile.num_phy_pins / sub_tile_capacity) - 1; - *pin_low = sub_tile.sub_tile_to_tile_pin_indices[rel_pin_low]; - *pin_high = sub_tile.sub_tile_to_tile_pin_indices[rel_pin_high]; + int pin_low = sub_tile.sub_tile_to_tile_pin_indices[rel_pin_low]; + int pin_high = sub_tile.sub_tile_to_tile_pin_indices[rel_pin_high]; + + return {pin_low, pin_high}; } t_physical_tile_type_ptr find_tile_type_by_name(const std::string& name, const std::vector& types) { @@ -692,7 +690,7 @@ t_block_loc get_block_loc(const ParentBlockId& block_id, bool is_flat) { } VTR_ASSERT(cluster_block_id != ClusterBlockId::INVALID()); - auto blk_loc = place_ctx.block_locs[cluster_block_id]; + auto blk_loc = place_ctx.block_locs()[cluster_block_id]; return blk_loc; } @@ -702,7 +700,10 @@ int get_block_num_class(const ParentBlockId& block_id, bool is_flat) { return get_tile_class_max_ptc(type, is_flat); } -int get_block_pin_class_num(const ParentBlockId& block_id, const ParentPinId& pin_id, bool is_flat) { +int get_block_pin_class_num(const ParentBlockId block_id, const ParentPinId pin_id, bool is_flat) { + const auto& blk_loc_registry = g_vpr_ctx.placement().blk_loc_registry(); + auto& block_locs = blk_loc_registry.block_locs(); + int class_num; if (is_flat) { @@ -710,9 +711,10 @@ int get_block_pin_class_num(const ParentBlockId& block_id, const ParentPinId& pi class_num = get_atom_pin_class_num(atom_pin_id); } else { ClusterBlockId cluster_block_id = convert_to_cluster_block_id(block_id); + t_pl_loc block_loc = block_locs[cluster_block_id].loc; ClusterPinId cluster_pin_id = convert_to_cluster_pin_id(pin_id); - auto type = physical_tile_type(cluster_block_id); - int phys_pin = tile_pin_index(cluster_pin_id); + auto type = physical_tile_type(block_loc); + int phys_pin = blk_loc_registry.tile_pin_index(cluster_pin_id); class_num = get_class_num_from_pin_physical_num(type, phys_pin); } @@ -1335,7 +1337,7 @@ static void load_pin_id_to_pb_mapping_rec(t_pb* cur_pb, t_pb** pin_id_to_pb_mapp */ void free_pin_id_to_pb_mapping(vtr::vector& pin_id_to_pb_mapping) { auto& cluster_ctx = g_vpr_ctx.clustering(); - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { delete[] pin_id_to_pb_mapping[blk_id]; } pin_id_to_pb_mapping.clear(); @@ -1343,8 +1345,8 @@ void free_pin_id_to_pb_mapping(vtr::vector& pin_id_to_pb std::tuple get_cluster_blk_physical_spec(ClusterBlockId cluster_blk_id) { auto& grid = g_vpr_ctx.device().grid; - auto& place_ctx = g_vpr_ctx.placement(); - auto& loc = place_ctx.block_locs[cluster_blk_id].loc; + auto& block_locs = g_vpr_ctx.placement().block_locs(); + auto& loc = block_locs[cluster_blk_id].loc; int cap = loc.sub_tile; const auto& physical_type = grid.get_physical_type({loc.x, loc.y, loc.layer}); VTR_ASSERT(grid.get_width_offset({loc.x, loc.y, loc.layer}) == 0 && grid.get_height_offset(t_physical_tile_loc(loc.x, loc.y, loc.layer)) == 0); @@ -1363,12 +1365,7 @@ std::vector get_cluster_internal_class_pairs(const AtomLookup& atom_lookup, ClusterBlockId cluster_block_id) { std::vector class_num_vec; - t_physical_tile_type_ptr physical_tile; - const t_sub_tile* sub_tile; - int rel_cap; - t_logical_block_type_ptr logical_block; - - std::tie(physical_tile, sub_tile, rel_cap, logical_block) = get_cluster_blk_physical_spec(cluster_block_id); + auto [physical_tile, sub_tile, rel_cap, logical_block] = get_cluster_blk_physical_spec(cluster_block_id); class_num_vec.reserve(physical_tile->primitive_class_inf.size()); const auto& cluster_atoms = cluster_to_atoms(cluster_block_id); @@ -1392,12 +1389,7 @@ std::vector get_cluster_internal_pins(ClusterBlockId cluster_blk_id) { auto& cluster_net_list = g_vpr_ctx.clustering().clb_nlist; - t_physical_tile_type_ptr physical_tile; - const t_sub_tile* sub_tile; - int rel_cap; - t_logical_block_type_ptr logical_block; - - std::tie(physical_tile, sub_tile, rel_cap, logical_block) = get_cluster_blk_physical_spec(cluster_blk_id); + auto [physical_tile, sub_tile, rel_cap, logical_block] = get_cluster_blk_physical_spec(cluster_blk_id); internal_pins.reserve(logical_block->pin_logical_num_to_pb_pin_mapping.size()); std::list internal_pbs; @@ -2061,82 +2053,7 @@ void print_switch_usage() { delete[] inward_switch_inf; } -/* - * Motivation: - * to see what portion of long wires are utilized - * potentially a good measure for router look ahead quality - */ -/* - * void print_usage_by_wire_length() { - * map used_wire_count; - * map total_wire_count; - * auto& device_ctx = g_vpr_ctx.device(); - * for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()){ - * if (rr_graph.node_type(rr_id) == CHANX || rr_graph.node_type(rr_id) == CHANY) { - * //int length = abs(rr_graph.node_xhigh(rr_id) + rr_graph.node_yhigh(rr_id) - * // - rr_graph.node_xlow(rr_id) - rr_graph.node_ylow(rr_id)); - * int length = device_ctx.rr_nodes[(size_t)rr_id].get_length(); - * if (rr_node_route_inf[(size_t)rr_id].occ() > 0) { - * if (used_wire_count.count(length) == 0) - * used_wire_count[length] = 0; - * used_wire_count[length] ++; - * } - * if (total_wire_count.count(length) == 0) - * total_wire_count[length] = 0; - * total_wire_count[length] ++; - * } - * } - * int total_wires = 0; - * map::iterator itr; - * for (itr = total_wire_count.begin(); itr != total_wire_count.end(); itr++) { - * total_wires += itr->second; - * } - * VTR_LOG("\n\t-=-=-=-=-=-=-=-=-=-=- wire usage stats -=-=-=-=-=-=-=-=-=-=-\n"); - * for (itr = total_wire_count.begin(); itr != total_wire_count.end(); itr++) - * VTR_LOG("\ttotal number: wire of length %d, ratio to all length of wires: %g\n", itr->first, ((float)itr->second) / total_wires); - * for (itr = used_wire_count.begin(); itr != used_wire_count.end(); itr++) { - * float ratio_to_same_type_total = ((float)itr->second) / total_wire_count[itr->first]; - * float ratio_to_all_type_total = ((float)itr->second) / total_wires; - * VTR_LOG("\t\tratio to same type of wire: %g\tratio to all types of wire: %g\n", ratio_to_same_type_total, ratio_to_all_type_total); - * } - * VTR_LOG("\n\t-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n\n"); - * used_wire_count.clear(); - * total_wire_count.clear(); - * } - */ -void place_sync_external_block_connections(ClusterBlockId iblk) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& clb_nlist = cluster_ctx.clb_nlist; - auto& place_ctx = g_vpr_ctx.mutable_placement(); - - auto physical_tile = physical_tile_type(iblk); - auto logical_block = clb_nlist.block_type(iblk); - - int sub_tile_index = get_sub_tile_index(iblk); - auto sub_tile = physical_tile->sub_tiles[sub_tile_index]; - - VTR_ASSERT(sub_tile.num_phy_pins % sub_tile.capacity.total() == 0); - - int max_num_block_pins = sub_tile.num_phy_pins / sub_tile.capacity.total(); - /* Logical location and physical location is offset by z * max_num_block_pins */ - - int rel_capacity = place_ctx.block_locs[iblk].loc.sub_tile - sub_tile.capacity.low; - - for (auto pin : clb_nlist.block_pins(iblk)) { - int logical_pin_index = clb_nlist.pin_logical_index(pin); - int sub_tile_pin_index = get_sub_tile_physical_pin(sub_tile_index, physical_tile, logical_block, logical_pin_index); - - int new_physical_pin_index = sub_tile.sub_tile_to_tile_pin_indices[sub_tile_pin_index + rel_capacity * max_num_block_pins]; - - auto result = place_ctx.physical_pins.find(pin); - if (result != place_ctx.physical_pins.end()) { - place_ctx.physical_pins[pin] = new_physical_pin_index; - } else { - place_ctx.physical_pins.insert(pin, new_physical_pin_index); - } - } -} int max_pins_per_grid_tile() { auto& device_ctx = g_vpr_ctx.device(); @@ -2149,21 +2066,6 @@ int max_pins_per_grid_tile() { return max_pins; } -int net_pin_to_tile_pin_index(const ClusterNetId net_id, int net_pin_index) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - - // Get the logical pin index of pin within it's logical block type - auto pin_id = cluster_ctx.clb_nlist.net_pin(net_id, net_pin_index); - - return tile_pin_index(pin_id); -} - -int tile_pin_index(const ClusterPinId pin) { - auto& place_ctx = g_vpr_ctx.placement(); - - return place_ctx.physical_pins[pin]; -} - int get_atom_pin_class_num(const AtomPinId atom_pin_id) { auto& atom_look_up = g_vpr_ctx.atom().lookup; auto& atom_net_list = g_vpr_ctx.atom().nlist; @@ -2171,11 +2073,7 @@ int get_atom_pin_class_num(const AtomPinId atom_pin_id) { auto atom_blk_id = atom_net_list.pin_block(atom_pin_id); auto cluster_block_id = atom_look_up.atom_clb(atom_blk_id); - t_physical_tile_type_ptr physical_type; - const t_sub_tile* sub_tile; - int sub_tile_rel_cap; - t_logical_block_type_ptr logical_block; - std::tie(physical_type, sub_tile, sub_tile_rel_cap, logical_block) = get_cluster_blk_physical_spec(cluster_block_id); + auto [physical_type, sub_tile, sub_tile_rel_cap, logical_block] = get_cluster_blk_physical_spec(cluster_block_id); auto pb_graph_pin = atom_look_up.atom_pin_pb_graph_pin(atom_pin_id); int pin_physical_num = -1; pin_physical_num = get_pb_pin_physical_num(physical_type, sub_tile, logical_block, sub_tile_rel_cap, pb_graph_pin); @@ -2197,7 +2095,7 @@ t_physical_tile_port find_tile_port_by_name(t_physical_tile_type_ptr type, const } void pretty_print_uint(const char* prefix, size_t value, int num_digits, int scientific_precision) { - //Print as integer if it will fit in the width, other wise scientific + //Print as integer if it will fit in the width, otherwise scientific if (value <= std::pow(10, num_digits) - 1) { //Direct VTR_LOG("%s%*zu", prefix, num_digits, value); @@ -2376,7 +2274,7 @@ std::vector get_cluster_netlist_intra_tile_classes_at_loc(int layer, const auto& place_ctx = g_vpr_ctx.placement(); const auto& atom_lookup = g_vpr_ctx.atom().lookup; - const auto& grid_block = place_ctx.grid_blocks; + const auto& grid_block = place_ctx.grid_blocks(); class_num_vec.reserve(physical_type->primitive_class_inf.size()); @@ -2386,7 +2284,7 @@ std::vector get_cluster_netlist_intra_tile_classes_at_loc(int layer, continue; } auto cluster_blk_id = grid_block.block_at_location({i, j, abs_cap, layer}); - VTR_ASSERT(cluster_blk_id != ClusterBlockId::INVALID() || cluster_blk_id != EMPTY_BLOCK_ID); + VTR_ASSERT(cluster_blk_id != ClusterBlockId::INVALID()); auto primitive_classes = get_cluster_internal_class_pairs(atom_lookup, cluster_blk_id); @@ -2406,8 +2304,8 @@ std::vector get_cluster_netlist_intra_tile_pins_at_loc(const int layer, const vtr::vector& pin_chains, const vtr::vector>& pin_chains_num, t_physical_tile_type_ptr physical_type) { - auto& place_ctx = g_vpr_ctx.placement(); - auto grid_block = place_ctx.grid_blocks; + const auto& place_ctx = g_vpr_ctx.placement(); + const auto& grid_block = place_ctx.grid_blocks(); std::vector pin_num_vec; pin_num_vec.reserve(get_tile_num_internal_pin(physical_type)); @@ -2419,7 +2317,7 @@ std::vector get_cluster_netlist_intra_tile_pins_at_loc(const int layer, continue; } auto cluster_blk_id = grid_block.block_at_location({i, j, abs_cap, layer}); - VTR_ASSERT(cluster_blk_id != ClusterBlockId::INVALID() && cluster_blk_id != EMPTY_BLOCK_ID); + VTR_ASSERT(cluster_blk_id != ClusterBlockId::INVALID()); cluster_internal_pins = get_cluster_internal_pins(cluster_blk_id); const auto& cluster_pin_chains = pin_chains_num[cluster_blk_id]; diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index 9382660142c..24da4489b6b 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -5,17 +5,18 @@ #include #include "vpr_types.h" +#include "vtr_vector.h" + #include "atom_netlist.h" #include "clustered_netlist.h" #include "netlist.h" -#include "vtr_vector.h" - #include "arch_util.h" #include "physical_types_util.h" #include "rr_graph_utils.h" #include "vpr_constraints.h" class DeviceGrid; +class PlacerState; const t_model* find_model(const t_model* models, const std::string& name, bool required = true); const t_model_ports* find_model_port(const t_model* model, const std::string& name, bool required = true); @@ -27,14 +28,21 @@ bool is_clb_external_pin(ClusterBlockId blk_id, int pb_pin_id); bool is_empty_type(t_physical_tile_type_ptr type); bool is_empty_type(t_logical_block_type_ptr type); -//Returns the corresponding physical type given the logical type as parameter -t_physical_tile_type_ptr physical_tile_type(ClusterBlockId blk); +/** + * @brief Returns the corresponding physical type given the location in the grid. + * @param loc The block location in the grid. + * @return The physical tile type of the given location. + */ +t_physical_tile_type_ptr physical_tile_type(t_pl_loc loc); t_physical_tile_type_ptr physical_tile_type(AtomBlockId atom_blk); t_physical_tile_type_ptr physical_tile_type(ParentBlockId blk_id, bool is_flat); //Returns the sub tile corresponding to the logical block location within a physical type +int get_sub_tile_index(ClusterBlockId blk, + const vtr::vector_map& block_locs); + int get_sub_tile_index(ClusterBlockId blk); int get_unique_pb_graph_node_id(const t_pb_graph_node* pb_graph_node); @@ -47,17 +55,21 @@ t_class_range get_class_range_for_block(const AtomBlockId atom_blk); t_class_range get_class_range_for_block(const ParentBlockId blk_id, bool is_flat); -//Returns the physical pin range relative to a block id. This must be called after placement -//as the block id is used to retrieve the information of the used physical tile. -void get_pin_range_for_block(const ClusterBlockId blk_id, - int* pin_low, - int* pin_high); +// +/** + * @brief Returns the physical pin range relative to a block id. This must be called after placement + * as the block id is used to retrieve the information of the used physical tile. + * + * @param blk_id The unique ID of a clustered block whose pin range is desired. + * @return std::pair --> (pin_low, pin_high) + */ +std::pair get_pin_range_for_block(const ClusterBlockId blk_id); t_block_loc get_block_loc(const ParentBlockId& block_id, bool is_flat); int get_block_num_class(const ParentBlockId& block_id, bool is_flat); -int get_block_pin_class_num(const ParentBlockId& block_id, const ParentPinId& pin_id, bool is_flat); +int get_block_pin_class_num(const ParentBlockId block_id, const ParentPinId pin_id, bool is_flat); template inline ClusterNetId convert_to_cluster_net_id(T id) { @@ -213,33 +225,6 @@ void print_usage_by_wire_length(); AtomBlockId find_memory_sibling(const t_pb* pb); -/** - * @brief Syncs the logical block pins corresponding to the input iblk with the corresponding chosen physical tile - * @param iblk cluster block ID to sync within the assigned physical tile - * - * This routine updates the physical pins vector of the place context after the placement step - * to syncronize the pins related to the logical block with the actual connection interface of - * the belonging physical tile with the RR graph. - * - * This step is required as the logical block can be placed at any compatible sub tile locations - * within a physical tile. - * Given that it is possible to have equivalent logical blocks within a specific sub tile, with - * a different subset of IO pins, the various pins offsets must be correctly computed and assigned - * to the physical pins vector, so that, when the net RR terminals are computed, the correct physical - * tile IO pins are selected. - * - * This routine uses the x,y and sub_tile coordinates of the clb netlist, and expects those to place each netlist block - * at a legal location that can accomodate it. - * It does not check for overuse of locations, therefore it can be used with placements that have resource overuse. - */ -void place_sync_external_block_connections(ClusterBlockId iblk); - -//Returns the physical pin of the tile, related to the given ClusterNedId, and the net pin index -int net_pin_to_tile_pin_index(const ClusterNetId net_id, int net_pin_index); - -//Returns the physical pin of the tile, related to the given ClusterPinId -int tile_pin_index(const ClusterPinId pin); - int get_atom_pin_class_num(const AtomPinId atom_pin_id); int max_pins_per_grid_tile(); diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp index 4633996458d..bcbba60a45e 100644 --- a/vpr/test/test_noc_place_utils.cpp +++ b/vpr/test/test_noc_place_utils.cpp @@ -29,11 +29,12 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") { // get global datastructures auto& noc_ctx = g_vpr_ctx.mutable_noc(); auto& place_ctx = g_vpr_ctx.mutable_placement(); + auto& block_locs = place_ctx.mutable_block_locs(); // start by deleting any global datastructures (this is so that we don't have corruption from previous tests) noc_ctx.noc_model.clear_noc(); noc_ctx.noc_traffic_flows_storage.clear_traffic_flows(); - place_ctx.block_locs.clear(); + block_locs.clear(); // store the reference to device grid with // the grid width will be the size of the noc mesh @@ -112,7 +113,7 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") { hard_router_block.get_router_layer_position()); // now add the cluster and its placed location to the placement datastructures - place_ctx.block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); + block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); } // similar parameters for all traffic flows @@ -194,7 +195,7 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") { } // now call the test function - initial_noc_routing({}); + initial_noc_routing({}, block_locs); // now verify the function by comparing the link bandwidths in the noc model (should have been updated by the test function) to the golden set int number_of_links = golden_link_bandwidths.size(); @@ -226,11 +227,12 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") { // get global datastructures auto& noc_ctx = g_vpr_ctx.mutable_noc(); auto& place_ctx = g_vpr_ctx.mutable_placement(); + auto& block_locs = place_ctx.mutable_block_locs(); // start by deleting any global datastructures (this is so that we don't have corruption from previous tests) noc_ctx.noc_model.clear_noc(); noc_ctx.noc_traffic_flows_storage.clear_traffic_flows(); - place_ctx.block_locs.clear(); + block_locs.clear(); // store the reference to device grid with // the grid width will be the size of the noc mesh @@ -309,7 +311,7 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") { hard_router_block.get_router_layer_position()); // now add the cluster and its placed location to the placement datastructures - place_ctx.block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); + block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); } noc_ctx.noc_model.set_noc_link_latency(1); @@ -389,7 +391,7 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") { // assume this works // this is needed to set up the global noc packet router and also global datastructures - initial_noc_routing({}); + initial_noc_routing({}, block_locs); SECTION("test_comp_noc_aggregate_bandwidth_cost") { //initialize all the cost calculator datastructures @@ -500,11 +502,12 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ // get global datastructures auto& noc_ctx = g_vpr_ctx.mutable_noc(); auto& place_ctx = g_vpr_ctx.mutable_placement(); + auto& block_locs = place_ctx.mutable_block_locs(); // start by deleting any global datastructures (this is so that we don't have corruption from previous tests) noc_ctx.noc_model.clear_noc(); noc_ctx.noc_traffic_flows_storage.clear_traffic_flows(); - place_ctx.block_locs.clear(); + block_locs.clear(); // store the reference to device grid with // the grid width will be the size of the noc mesh @@ -592,7 +595,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ router_where_cluster_is_placed.push_back((NocRouterId)cluster_block_number); // now add the cluster and its placed location to the placement datastructures - place_ctx.block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); + block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); } // similar parameters for all traffic flows @@ -676,7 +679,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ // assume this works // this is needed to set up the global noc packet router and also global datastructures - initial_noc_routing({}); + initial_noc_routing({}, block_locs); // datastructure below will store the bandwidth usages of all the links // and will be updated throughout this test. @@ -776,8 +779,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ router_where_cluster_is_placed[swap_router_block_two] = router_first_swap_cluster_location; // now move the blocks in the placement datastructures - place_ctx.block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; - place_ctx.block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; + block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; + block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; // get all the associated traffic flows of the moved cluster blocks const std::vector& assoc_traffic_flows_block_one = noc_ctx.noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(swap_router_block_one); @@ -866,7 +869,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ NocCostTerms delta_cost; // call the test function - find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost); + find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, block_locs); // update the test noc cost terms based on the cost changes found by the test functions test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth; @@ -932,8 +935,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ router_where_cluster_is_placed[swap_router_block_two] = router_first_swap_cluster_location; // now move the blocks in the placement datastructures - place_ctx.block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; - place_ctx.block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; + block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; + block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; // get all the associated traffic flows of the moved cluster blocks const std::vector& assoc_traffic_flows_block_one = noc_ctx.noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(swap_router_block_one); @@ -1014,7 +1017,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ NocCostTerms delta_cost; // call the test function - find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost); + find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, block_locs); // update the test noc cost terms based on the cost changes found by the test functions test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth; @@ -1068,8 +1071,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ router_where_cluster_is_placed[swap_router_block_two] = router_first_swap_cluster_location; // now move the blocks in the placement datastructures - place_ctx.block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; - place_ctx.block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; + block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; + block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; // get all the associated traffic flows of the moved cluster blocks // remember that the first cluster block doesn't have any traffic flows associated to it @@ -1115,7 +1118,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ delta_cost = NocCostTerms(); // call the test function - find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost); + find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, block_locs); // update the test noc cost terms based on the cost changes found by the test functions test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth; @@ -1171,8 +1174,8 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ router_where_cluster_is_placed[swap_router_block_two] = router_first_swap_cluster_location; // now move the blocks in the placement datastructures - place_ctx.block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; - place_ctx.block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; + block_locs[swap_router_block_one].loc = blocks_affected.moved_blocks[0].new_loc; + block_locs[swap_router_block_two].loc = blocks_affected.moved_blocks[1].new_loc; // we don't have to calculate the costs or update bandwidths because the swapped router blocks do not have any associated traffic flows // @@ -1180,7 +1183,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ delta_cost = NocCostTerms(); // call the test function - find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost); + find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost, block_locs); // update the test noc cost terms based on the cost changes found by the test functions test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth; @@ -1377,11 +1380,12 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { // get global datastructures auto& noc_ctx = g_vpr_ctx.mutable_noc(); auto& place_ctx = g_vpr_ctx.mutable_placement(); + auto& block_locs = place_ctx.mutable_block_locs(); // start by deleting any global datastructures (this is so that we don't have corruption from previous tests) noc_ctx.noc_model.clear_noc(); noc_ctx.noc_traffic_flows_storage.clear_traffic_flows(); - place_ctx.block_locs.clear(); + block_locs.clear(); // store the reference to device grid with // the grid width will be the size of the noc mesh @@ -1467,7 +1471,7 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { router_where_cluster_is_placed.push_back((NocRouterId)cluster_block_number); // now add the cluster and its placed location to the placement datastructures - place_ctx.block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); + block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); } // similar parameters for all traffic flows @@ -1540,7 +1544,7 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { // assume this works // this is needed to set up the global noc packet router and also global datastructures - initial_noc_routing({}); + initial_noc_routing({}, block_locs); // datastructure below will store the bandwidth usages of all the links // and will be updated throughout this test. @@ -1694,7 +1698,7 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { // To undo this we just need to update the noc link bandwidths as if there was no swap (we do this by calling the test function) // This should then re-update the noc link bandwidths to their values before we imitated the swap above // THe result is that the link bandwidths should match the golden link bandwidths that never changed after the initial router block placement (at a point before block swapping) - revert_noc_traffic_flow_routes(blocks_affected); + revert_noc_traffic_flow_routes(blocks_affected, block_locs); // now verify if the test function worked correctly by comparing the noc link bandwidths to the golden link bandwidths int number_of_links = golden_link_bandwidths.size(); @@ -1728,11 +1732,12 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { // get global datastructures auto& noc_ctx = g_vpr_ctx.mutable_noc(); auto& place_ctx = g_vpr_ctx.mutable_placement(); + auto& block_locs = place_ctx.mutable_block_locs(); // start by deleting any global datastructures (this is so that we don't have corruption from previous tests) noc_ctx.noc_model.clear_noc(); noc_ctx.noc_traffic_flows_storage.clear_traffic_flows(); - place_ctx.block_locs.clear(); + block_locs.clear(); // store the reference to device grid with // the grid width will be the size of the noc mesh @@ -1823,7 +1828,7 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { router_where_cluster_is_placed.push_back((NocRouterId)cluster_block_number); // now add the cluster and its placed location to the placement datastructures - place_ctx.block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); + block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); } // similar parameters for all traffic flows @@ -1952,7 +1957,7 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { SECTION("Case where check place works after initial placement") { // run the test function - int error = check_noc_placement_costs(costs, error_tolerance, noc_opts); + int error = check_noc_placement_costs(costs, error_tolerance, noc_opts, block_locs); // we expect error to be 0 here, meaning the found costs are within the error tolerance of the noc golden costs REQUIRE(error == 0); @@ -1974,7 +1979,7 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { } // run the test function - int error = check_noc_placement_costs(costs, error_tolerance, noc_opts); + int error = check_noc_placement_costs(costs, error_tolerance, noc_opts, block_locs); // we expect error to be 4 here, meaning the found costs are not within the tolerance range REQUIRE(error == 4);