From 5b257cd5d01321c8224b6eba162c3250ccc3639c Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 14 Nov 2024 16:46:52 -0500 Subject: [PATCH 01/32] typos --- libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp | 2 +- .../libtatum/libtatum/tatum/TimingConstraints.cpp | 2 +- .../libtatum/libtatum/tatum/TimingConstraints.hpp | 2 +- .../libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp | 2 +- .../tatum/graph_visitors/CommonAnalysisVisitor.hpp | 4 ++-- .../libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp | 4 ++-- .../tatum/graph_walkers/ParallelLevelizedWalker.hpp | 2 +- vpr/src/place/place.cpp | 9 ++++----- vpr/src/timing/concrete_timing_info.h | 2 +- 9 files changed, 14 insertions(+), 15 deletions(-) diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp index 85f50e9ac62..f17fba8a752 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp @@ -16,7 +16,7 @@ namespace tatum { /** \file * The 'SetupAnalysis' class defines the operations needed by a GraphWalker class - * to perform a setup (max/longest path) analysis. It satisifes and extends the GraphVisitor + * to perform a setup (max/longest path) analysis. It satisfies and extends the GraphVisitor * concept class. * * Setup Analysis Principles diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.cpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.cpp index 510fadd9e51..8eeff58d50b 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.cpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.cpp @@ -132,7 +132,7 @@ Time TimingConstraints::setup_constraint(const DomainId src_domain, const Domain return iter->second; } - //If no capture node specific constraint was found, fallback to the domain pair constriant + //If no capture node specific constraint was found, fallback to the domain pair constraint iter = setup_constraints_.find(NodeDomainPair(src_domain, sink_domain, NodeId::INVALID())); if(iter != setup_constraints_.end()) { return iter->second; diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.hpp index 07288ed08ba..225ac48f7d5 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.hpp @@ -45,7 +45,7 @@ class TimingConstraints { ///\returns The source NodeId of the specified domain NodeId clock_domain_source_node(const DomainId id) const; - //\returns whether the specified domain id corresponds to a virtual lcock + //\returns whether the specified domain id corresponds to a virtual clock bool is_virtual_clock(const DomainId id) const; ///\returns The domain of the specified node id if it is a clock source diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp index 70a8bbe2758..7b7f0540891 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp @@ -9,7 +9,7 @@ namespace tatum { namespace detail { * * The operations for CommonAnalysisVisitor to perform setup analysis. * The setup analysis operations define that maximum edge delays are used, and that the - * maixmum arrival time (and minimum required times) are propagated through the timing graph. + * maximum arrival time (and minimum required times) are propagated through the timing graph. * * \see HoldAnalysisOps * \see SetupAnalysisOps diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisVisitor.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisVisitor.hpp index 6b901b21def..82bbd8da30a 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisVisitor.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisVisitor.hpp @@ -152,10 +152,10 @@ bool CommonAnalysisVisitor::do_arrival_pre_traverse_node(const Timi bool node_constrained = false; if(tc.node_is_constant_generator(node_id)) { - //We progpagate the tags from constant generators to ensure any sinks driven + //We propagate the tags from constant generators to ensure any sinks driven //only by constant generators are recorded as constrained. // - //We use a special tag to initialize constant generators which gets overritten + //We use a special tag to initialize constant generators which gets overwritten //by any non-constant tag at downstream nodes TimingTag const_gen_tag = ops_.const_gen_tag(); diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp index 313efa244d7..253b31af5ba 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp @@ -8,7 +8,7 @@ namespace tatum { namespace detail { * * The operations for CommonAnalysisVisitor to perform setup analysis. * The setup analysis operations define that maximum edge delays are used, and that the - * maixmum arrival time (and minimum required times) are propagated through the timing graph. + * maximum arrival time (and minimum required times) are propagated through the timing graph. * * \see HoldAnalysisOps * \see CommonAnalysisVisitor @@ -121,7 +121,7 @@ class SetupAnalysisOps : public CommonAnalysisOps { Time calculate_slack(const Time required_time, const Time arrival_time) { //Setup requires the arrival to occur *before* the required time, so //slack is the amount of required time left after the arrival time; meaning - //we we subtract the arrival time from the required time to get the setup slack + //we subtract the arrival time from the required time to get the setup slack return required_time - arrival_time; } diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/ParallelLevelizedWalker.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/ParallelLevelizedWalker.hpp index 0cbf1a5863b..0104d10d3e3 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/ParallelLevelizedWalker.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/ParallelLevelizedWalker.hpp @@ -11,7 +11,7 @@ namespace tatum { /** - * A parallel timing analyzer which traveres the timing graph in a levelized + * A parallel timing analyzer which traverses the timing graph in a levelized * manner. However nodes within each level are processed in parallel using * Thread Building Blocks (TBB). If TBB is not available it operates serially and is * equivalent to the SerialWalker. diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 9af234a13f8..4e463ae0e0e 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -145,11 +145,10 @@ void try_place(const Netlist<>& net_list, * if is_flat is false, even if is_flat is set to true from the command line. */ VTR_ASSERT(!is_flat); - auto& device_ctx = g_vpr_ctx.device(); - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - auto& timing_ctx = g_vpr_ctx.timing(); + const auto& device_ctx = g_vpr_ctx.device(); + const auto& atom_ctx = g_vpr_ctx.atom(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& timing_ctx = g_vpr_ctx.timing(); auto pre_place_timing_stats = timing_ctx.stats; t_placer_costs costs(placer_opts.place_algorithm, noc_opts.noc); diff --git a/vpr/src/timing/concrete_timing_info.h b/vpr/src/timing/concrete_timing_info.h index 9aaae0d82ff..ce02e2abe90 100644 --- a/vpr/src/timing/concrete_timing_info.h +++ b/vpr/src/timing/concrete_timing_info.h @@ -454,7 +454,7 @@ class ConstantTimingInfo : public SetupHoldTimingInfo { /** Create a SetupTimingInfo for the given delay calculator */ template std::unique_ptr make_setup_timing_info(std::shared_ptr delay_calculator, e_timing_update_type update_type) { - auto& timing_ctx = g_vpr_ctx.timing(); + const auto& timing_ctx = g_vpr_ctx.timing(); std::shared_ptr analyzer; From bae8502eba7c3c7fe7a5e2beb5a704dd995a06c6 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 16 Nov 2024 15:21:32 -0500 Subject: [PATCH 02/32] partially added Placer class --- vpr/src/base/clustered_netlist_utils.h | 1 + vpr/src/place/place.cpp | 344 +------------------------ vpr/src/place/place_delay_model.cpp | 2 +- vpr/src/place/place_delay_model.h | 2 +- vpr/src/place/placer.cpp | 281 ++++++++++++++++++++ vpr/src/place/placer.h | 74 ++++++ vpr/src/place/timing_place_lookup.cpp | 10 +- vpr/src/place/timing_place_lookup.h | 2 +- vpr/src/util/vpr_utils.h | 1 + 9 files changed, 377 insertions(+), 340 deletions(-) create mode 100644 vpr/src/place/placer.cpp create mode 100644 vpr/src/place/placer.h diff --git a/vpr/src/base/clustered_netlist_utils.h b/vpr/src/base/clustered_netlist_utils.h index 52688f88e47..b5d1504ed91 100644 --- a/vpr/src/base/clustered_netlist_utils.h +++ b/vpr/src/base/clustered_netlist_utils.h @@ -14,6 +14,7 @@ class ClusteredPinAtomPinsLookup { typedef typename vtr::Range atom_pin_range; public: + ClusteredPinAtomPinsLookup() = default; ClusteredPinAtomPinsLookup(const ClusteredNetlist& clustered_netlist, const AtomNetlist& atom_netlist, const IntraLbPbPinLookup& pb_gpin_lookup); atom_pin_range connected_atom_pins(ClusterPinId clustered_pin) const; diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 4e463ae0e0e..f7e31bc5bb3 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -53,7 +53,7 @@ #include "net_cost_handler.h" #include "placer_state.h" - +#include "placer.h" /********************* Static subroutines local to place.c *******************/ #ifdef VERBOSE @@ -69,30 +69,8 @@ void print_clb_placement(const char* fname); static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, const RRGraphView& rr_graph); -static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - const std::vector& directs, - PlacerState& placer_state, - std::optional& noc_cost_handler); - static void free_placement_structs(); -static void check_place(const t_placer_costs& costs, - const PlaceDelayModel* delay_model, - const PlacerCriticalities* criticalities, - const t_place_algorithm& place_algorithm, - const t_noc_opts& noc_opts, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - const std::optional& noc_cost_handler); - -static int check_placement_costs(const t_placer_costs& costs, - const PlaceDelayModel* delay_model, - const PlacerCriticalities* criticalities, - const t_place_algorithm& place_algorithm, - PlacerState& placer_state, - NetCostHandler& net_cost_handler); - static int count_connections(); static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, @@ -151,25 +129,15 @@ void try_place(const Netlist<>& net_list, const auto& timing_ctx = g_vpr_ctx.timing(); auto pre_place_timing_stats = timing_ctx.stats; - t_placer_costs costs(placer_opts.place_algorithm, noc_opts.noc); - tatum::TimingPathInfo critical_path; float sTNS = NAN; float sWNS = NAN; char msg[vtr::bufsize]; - t_placement_checkpoint placement_checkpoint; - - std::shared_ptr timing_info; - std::shared_ptr placement_delay_calc; - std::unique_ptr place_delay_model; - std::unique_ptr placer_setup_slacks; - std::unique_ptr placer_criticalities; - std::unique_ptr pin_timing_invalidator; - - t_pl_blocks_to_be_moved blocks_affected(net_list.blocks().size()); - + /* Placement delay model is independent of the placement and can be shared across + * multiple placers. So, it is created and initialized once. */ + std::shared_ptr place_delay_model; if (placer_opts.place_algorithm.is_timing_driven()) { /*do this before the initial placement to avoid messing up the initial placement */ place_delay_model = alloc_lookups_and_delay_model(net_list, @@ -195,213 +163,26 @@ void try_place(const Netlist<>& net_list, int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333)); - PlacerState placer_state(placer_opts.place_algorithm.is_timing_driven(), cube_bb); - auto& blk_loc_registry = placer_state.mutable_blk_loc_registry(); - const auto& p_timing_ctx = placer_state.timing(); - const auto& p_runtime_ctx = placer_state.runtime(); - - vtr::RngContainer rng(placer_opts.seed); + auto& place_ctx = g_vpr_ctx.mutable_placement(); + place_ctx.lock_loc_vars(); + place_ctx.compressed_block_grids = create_compressed_block_grids(); - std::optional noc_cost_handler; - // create cost handler objects - NetCostHandler net_cost_handler = alloc_and_load_placement_structs(placer_opts, noc_opts, directs, - placer_state, noc_cost_handler); + Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb); #ifndef NO_GRAPHICS - if (noc_cost_handler.has_value()) { - get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(noc_cost_handler->get_link_bandwidth_usages()); + if (placer.noc_cost_handler_.has_value()) { + get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(placer.noc_cost_handler_->get_link_bandwidth_usages()); } #endif - vtr::ScopedStartFinishTimer timer("Placement"); - - if (noc_opts.noc) { - normalize_noc_cost_weighting_factor(const_cast(noc_opts)); - } - - initial_placement(placer_opts, placer_opts.constraints_file.c_str(), - noc_opts, blk_loc_registry, noc_cost_handler, rng); - - //create the move generator based on the chosen strategy - auto [move_generator, move_generator2] = create_move_generators(placer_state, placer_opts, move_lim, noc_opts.noc_centroid_weight, rng); - - if (!placer_opts.write_initial_place_file.empty()) { - print_place(nullptr, nullptr, placer_opts.write_initial_place_file.c_str(), placer_state.block_locs()); - } - -#ifdef ENABLE_ANALYTIC_PLACE - /* - * Analytic Placer: - * Passes in the initial_placement via vpr_context, and passes its placement back via locations marked on - * both the clb_netlist and the gird. - * Most of anneal is disabled later by setting initial temperature to 0 and only further optimizes in quench - */ - if (placer_opts.enable_analytic_placer) { - AnalyticPlacer{blk_loc_registry}.ap_place(); - } - -#endif /* ENABLE_ANALYTIC_PLACE */ - - // Update physical pin values - for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { - blk_loc_registry.place_sync_external_block_connections(block_id); - } - const int width_fac = placer_opts.place_chan_width; - init_draw_coords((float)width_fac, blk_loc_registry); - - /* Allocated here because it goes into timing critical code where each memory allocation is expensive */ - IntraLbPbPinLookup pb_gpin_lookup(device_ctx.logical_block_types); - //Enables fast look-up of atom pins connect to CLB pins - ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, pb_gpin_lookup); - - /* Gets initial cost and loads bounding boxes. */ - - if (placer_opts.place_algorithm.is_timing_driven()) { - costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL); - - int num_connections = count_connections(); - VTR_LOG("\n"); - VTR_LOG("There are %d point to point connections in this circuit.\n", - num_connections); - VTR_LOG("\n"); - - //Update the point-to-point delays from the initial placement - comp_td_connection_delays(place_delay_model.get(), placer_state); - - /* - * Initialize timing analysis - */ - // For placement, we don't use flat-routing - placement_delay_calc = std::make_shared(atom_ctx.nlist, - atom_ctx.lookup, - p_timing_ctx.connection_delay, - is_flat); - placement_delay_calc->set_tsu_margin_relative(placer_opts.tsu_rel_margin); - placement_delay_calc->set_tsu_margin_absolute(placer_opts.tsu_abs_margin); - - timing_info = make_setup_timing_info(placement_delay_calc, placer_opts.timing_update_type); - - placer_setup_slacks = std::make_unique(cluster_ctx.clb_nlist, netlist_pin_lookup); - - placer_criticalities = std::make_unique(cluster_ctx.clb_nlist, netlist_pin_lookup); - - pin_timing_invalidator = make_net_pin_timing_invalidator( - placer_opts.timing_update_type, - net_list, - netlist_pin_lookup, - atom_ctx.nlist, - atom_ctx.lookup, - *timing_info->timing_graph(), - is_flat); - - //First time compute timing and costs, compute from scratch - PlaceCritParams crit_params; - crit_params.crit_exponent = placer_opts.td_place_exp_first; - crit_params.crit_limit = placer_opts.place_crit_limit; - - initialize_timing_info(crit_params, place_delay_model.get(), placer_criticalities.get(), - placer_setup_slacks.get(), pin_timing_invalidator.get(), - timing_info.get(), &costs, placer_state); - - critical_path = timing_info->least_slack_critical_path(); - - /* Write out the initial timing echo file */ - if (isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)) { - tatum::write_echo( - getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH), - *timing_ctx.graph, *timing_ctx.constraints, - *placement_delay_calc, timing_info->analyzer()); - - tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node); - - write_setup_timing_graph_dot( - getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH) - + std::string(".dot"), - *timing_info, debug_tnode); - } - - /* Initialize the normalization factors. Calling costs.update_norm_factors() * - * here would fail the golden results of strong_sdc benchmark */ - costs.timing_cost_norm = 1 / costs.timing_cost; - costs.bb_cost_norm = 1 / costs.bb_cost; - } else { - VTR_ASSERT(placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); - - /* Total cost is the same as wirelength cost normalized*/ - costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL); - costs.bb_cost_norm = 1 / costs.bb_cost; - - /* Timing cost and normalization factors are not used */ - constexpr double INVALID_COST = std::numeric_limits::quiet_NaN(); - costs.timing_cost = INVALID_COST; - costs.timing_cost_norm = INVALID_COST; - } - - if (noc_opts.noc) { - VTR_ASSERT(noc_cost_handler.has_value()); - - // get the costs associated with the NoC - costs.noc_cost_terms.aggregate_bandwidth = noc_cost_handler->comp_noc_aggregate_bandwidth_cost(); - std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = noc_cost_handler->comp_noc_latency_cost(); - costs.noc_cost_terms.congestion = noc_cost_handler->comp_noc_congestion_cost(); - - // initialize all the noc normalization factors - noc_cost_handler->update_noc_normalization_factors(costs); - } - - // set the starting total placement cost - costs.cost = costs.get_total_cost(placer_opts, noc_opts); - - //Sanity check that initial placement is legal - check_place(costs, - place_delay_model.get(), - placer_criticalities.get(), - placer_opts.place_algorithm, - noc_opts, - placer_state, - net_cost_handler, - noc_cost_handler); - - //Initial placement statistics - VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost, - costs.bb_cost, costs.timing_cost); - if (noc_opts.noc) { - VTR_ASSERT(noc_cost_handler.has_value()); - - noc_cost_handler->print_noc_costs("Initial NoC Placement Costs", costs, noc_opts); - } - if (placer_opts.place_algorithm.is_timing_driven()) { - VTR_LOG( - "Initial placement estimated Critical Path Delay (CPD): %g ns\n", - 1e9 * critical_path.delay()); - VTR_LOG( - "Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n", - 1e9 * timing_info->setup_total_negative_slack()); - VTR_LOG( - "Initial placement estimated setup Worst Negative Slack (sWNS): %g ns\n", - 1e9 * timing_info->setup_worst_negative_slack()); - VTR_LOG("\n"); - - VTR_LOG("Initial placement estimated setup slack histogram:\n"); - print_histogram(create_setup_slack_histogram(*timing_info->setup_analyzer())); - } - - size_t num_macro_members = 0; - for (auto& macro : blk_loc_registry.place_macros().macros()) { - num_macro_members += macro.members.size(); - } - VTR_LOG( - "Placement contains %zu placement macros involving %zu blocks (average macro size %f)\n", - blk_loc_registry.place_macros().macros().size(), num_macro_members, - float(num_macro_members) / blk_loc_registry.place_macros().macros().size()); - VTR_LOG("\n"); + init_draw_coords((float)width_fac, placer.placer_state_.blk_loc_registry()); sprintf(msg, "Initial Placement. Cost: %g BB Cost: %g TD Cost %g \t Channel Factor: %d", costs.cost, costs.bb_cost, costs.timing_cost, width_fac); - //Draw the initial placement + // Draw the initial placement update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info); if (placer_opts.placement_saves_per_temperature >= 1) { @@ -525,8 +306,6 @@ void try_place(const Netlist<>& net_list, print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); } - // TODO: - // 1. add some subroutine hierarchy! Too big! //#ifdef VERBOSE // if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_END_CLB_PLACEMENT)) { @@ -668,28 +447,6 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, return cube_bb; } -/* Allocates the major structures needed only by the placer, primarily for * - * computing costs quickly and such. */ -static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - const std::vector& directs, - PlacerState& placer_state, - std::optional& noc_cost_handler) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); - - place_ctx.lock_loc_vars(); - - init_placement_context(placer_state.mutable_blk_loc_registry(), directs); - - place_ctx.compressed_block_grids = create_compressed_block_grids(); - - if (noc_opts.noc) { - noc_cost_handler.emplace(placer_state.block_locs()); - } - - return NetCostHandler{placer_opts, placer_state, place_ctx.cube_bb}; -} - /* Frees the major structures needed by the placer (and not needed * * elsewhere). */ static void free_placement_structs() { @@ -697,83 +454,6 @@ static void free_placement_structs() { vtr::release_memory(place_ctx.compressed_block_grids); } -static void check_place(const t_placer_costs& costs, - const PlaceDelayModel* delay_model, - const PlacerCriticalities* criticalities, - const t_place_algorithm& place_algorithm, - const t_noc_opts& noc_opts, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - const std::optional& noc_cost_handler) { - /* Checks that the placement has not confused our data structures. * - * i.e. the clb and block structures agree about the locations of * - * every block, blocks are in legal spots, etc. Also recomputes * - * the final placement cost from scratch and makes sure it is * - * within roundoff of what we think the cost is. */ - const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - const DeviceGrid& device_grid = g_vpr_ctx.device().grid; - const auto& cluster_constraints = g_vpr_ctx.floorplanning().cluster_constraints; - - int error = 0; - - // Verify the placement invariants independent to the placement flow. - error += verify_placement(placer_state.blk_loc_registry(), - clb_nlist, - device_grid, - cluster_constraints); - - error += check_placement_costs(costs, delay_model, criticalities, place_algorithm, placer_state, net_cost_handler); - - if (noc_opts.noc) { - // check the NoC costs during placement if the user is using the NoC supported flow - error += noc_cost_handler->check_noc_placement_costs(costs, PL_INCREMENTAL_COST_TOLERANCE, noc_opts); - // make sure NoC routing configuration does not create any cycles in CDG - error += (int)noc_cost_handler->noc_routing_has_cycle(); - } - - if (error == 0) { - VTR_LOG("\n"); - VTR_LOG("Completed placement consistency check successfully.\n"); - - } else { - VPR_ERROR(VPR_ERROR_PLACE, - "\nCompleted placement consistency check, %d errors found.\n" - "Aborting program.\n", - error); - } -} - -static int check_placement_costs(const t_placer_costs& costs, - const PlaceDelayModel* delay_model, - const PlacerCriticalities* criticalities, - const t_place_algorithm& place_algorithm, - PlacerState& placer_state, - NetCostHandler& net_cost_handler) { - int error = 0; - double timing_cost_check; - - double bb_cost_check = net_cost_handler.comp_bb_cost(e_cost_methods::CHECK); - - if (fabs(bb_cost_check - costs.bb_cost) > costs.bb_cost * PL_INCREMENTAL_COST_TOLERANCE) { - VTR_LOG_ERROR( - "bb_cost_check: %g and bb_cost: %g differ in check_place.\n", - bb_cost_check, costs.bb_cost); - error++; - } - - if (place_algorithm.is_timing_driven()) { - comp_td_costs(delay_model, *criticalities, placer_state, &timing_cost_check); - //VTR_LOG("timing_cost recomputed from scratch: %g\n", timing_cost_check); - if (fabs(timing_cost_check - costs.timing_cost) > costs.timing_cost * PL_INCREMENTAL_COST_TOLERANCE) { - VTR_LOG_ERROR( - "timing_cost_check: %g and timing_cost: %g differ in check_place.\n", - timing_cost_check, costs.timing_cost); - error++; - } - } - return error; -} - #ifdef VERBOSE void print_clb_placement(const char* fname) { /* Prints out the clb placements to a file. */ diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/place_delay_model.cpp index 4f626a5817f..36070bf8423 100644 --- a/vpr/src/place/place_delay_model.cpp +++ b/vpr/src/place/place_delay_model.cpp @@ -318,7 +318,7 @@ void OverrideDelayModel::write(const std::string& file) const { #endif ///@brief Initialize the placer delay model. -std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, +std::shared_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, t_chan_width_dist chan_width_dist, const t_placer_opts& placer_opts, const t_router_opts& router_opts, diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/place_delay_model.h index 0aa01385e6e..d1cd3c2164a 100644 --- a/vpr/src/place/place_delay_model.h +++ b/vpr/src/place/place_delay_model.h @@ -29,7 +29,7 @@ class PlaceDelayModel; class PlacerState; ///@brief Initialize the placer delay model. -std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, +std::shared_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, t_chan_width_dist chan_width_dist, const t_placer_opts& place_opts, const t_router_opts& router_opts, diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp new file mode 100644 index 00000000000..d0b59b7a17a --- /dev/null +++ b/vpr/src/place/placer.cpp @@ -0,0 +1,281 @@ + +#include "placer.h" + +#include "vtr_time.h" +#include "read_place.h" +#include "analytic_placer.h" +#include "initial_placement.h" +#include "concrete_timing_info.h" +#include "tatum/echo_writer.hpp" +#include "verify_placement.h" +#include "place_timing_update.h" + +Placer::Placer(const Netlist<>& net_list, + const t_placer_opts& placer_opts, + const t_analysis_opts& analysis_opts, + const t_noc_opts& noc_opts, + const std::vector& directs, + std::shared_ptr place_delay_model, + bool cube_bb) + : placer_opts_(placer_opts) + , noc_opts_(noc_opts) + , costs_(placer_opts.place_algorithm, noc_opts.noc) + , placer_state_(placer_opts.place_algorithm.is_timing_driven(), cube_bb) + , rng_(placer_opts.seed) + , net_cost_handler_(placer_opts, placer_state_, cube_bb) + , place_delay_model_(place_delay_model){ + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& device_ctx = g_vpr_ctx.device(); + const auto& atom_ctx = g_vpr_ctx.atom(); + + init_placement_context(placer_state_.mutable_blk_loc_registry(), directs); + + // create a NoC cost handler if NoC optimization is enabled + if (noc_opts.noc) { + noc_cost_handler_.emplace(placer_state_.block_locs()); + } + + // Start measuring placement time + timer_ = std::make_unique("Placement"); + + /* To make sure the importance of NoC-related cost terms compared to + * BB and timing cost is determine only through NoC placement weighting factor, + * we normalize NoC-related cost weighting factors so that they add up to 1. + * With this normalization, NoC-related cost weighting factors only determine + * the relative importance of NoC cost terms with respect to each other, while + * the importance of total NoC cost to conventional placement cost is determined + * by NoC placement weighting factor. + */ + if (noc_opts.noc) { + normalize_noc_cost_weighting_factor(const_cast(noc_opts)); + } + + + BlkLocRegistry& blk_loc_registry = placer_state_.mutable_blk_loc_registry(); + initial_placement(placer_opts, placer_opts.constraints_file.c_str(), + noc_opts, blk_loc_registry, noc_cost_handler_, rng_); + + //create the move generator based on the chosen placement strategy +// auto [move_generator, move_generator2] = create_move_generators(placer_state_, placer_opts, move_lim, noc_opts.noc_centroid_weight, rng_); + + if (!placer_opts.write_initial_place_file.empty()) { + print_place(nullptr, nullptr, placer_opts.write_initial_place_file.c_str(), placer_state_.block_locs()); + } + +#ifdef ENABLE_ANALYTIC_PLACE + /* + * Analytic Placer: + * Passes in the initial_placement via vpr_context, and passes its placement back via locations marked on + * both the clb_netlist and the gird. + * Most of anneal is disabled later by setting initial temperature to 0 and only further optimizes in quench + */ + if (placer_opts.enable_analytic_placer) { + AnalyticPlacer{blk_loc_registry}.ap_place(); + } + +#endif /* ENABLE_ANALYTIC_PLACE */ + + // Update physical pin values + for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { + blk_loc_registry.place_sync_external_block_connections(block_id); + } + + // Allocate here because it goes into timing critical code where each memory allocation is expensive + pb_gpin_lookup_ = IntraLbPbPinLookup(device_ctx.logical_block_types); + // Enables fast look-up of atom pins connect to CLB pins + netlist_pin_lookup_ = ClusteredPinAtomPinsLookup(cluster_ctx.clb_nlist, atom_ctx.nlist, pb_gpin_lookup_); + + // Gets initial cost and loads bounding boxes. + costs_.bb_cost = net_cost_handler_.comp_bb_cost(e_cost_methods::NORMAL); + costs_.bb_cost_norm = 1 / costs_.bb_cost; + + if (placer_opts.place_algorithm.is_timing_driven()) { + alloc_and_init_timing_objects_(net_list, analysis_opts); + } else { + VTR_ASSERT(placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); + // Timing cost and normalization factors are not used + constexpr double INVALID_COST = std::numeric_limits::quiet_NaN(); + costs_.timing_cost = INVALID_COST; + costs_.timing_cost_norm = INVALID_COST; + } + + if (noc_opts.noc) { + VTR_ASSERT(noc_cost_handler_.has_value()); + + // get the costs associated with the NoC + costs_.noc_cost_terms.aggregate_bandwidth = noc_cost_handler_->comp_noc_aggregate_bandwidth_cost(); + std::tie(costs_.noc_cost_terms.latency, costs_.noc_cost_terms.latency_overrun) = noc_cost_handler_->comp_noc_latency_cost(); + costs_.noc_cost_terms.congestion = noc_cost_handler_->comp_noc_congestion_cost(); + + // initialize all the noc normalization factors + noc_cost_handler_->update_noc_normalization_factors(costs_); + } + + // set the starting total placement cost + costs_.cost = costs_.get_total_cost(placer_opts, noc_opts); +} + +void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list, + const t_analysis_opts& analysis_opts) { + const auto& atom_ctx = g_vpr_ctx.atom(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& timing_ctx = g_vpr_ctx.timing(); + const auto& p_timing_ctx = placer_state_.timing(); + + // Update the point-to-point delays from the initial placement + comp_td_connection_delays(place_delay_model_.get(), placer_state_); + + // Initialize timing analysis + placement_delay_calc_ = std::make_shared(atom_ctx.nlist, + atom_ctx.lookup, + p_timing_ctx.connection_delay, + /*is_flat=*/false); + placement_delay_calc_->set_tsu_margin_relative(placer_opts_.tsu_rel_margin); + placement_delay_calc_->set_tsu_margin_absolute(placer_opts_.tsu_abs_margin); + + timing_info_ = make_setup_timing_info(placement_delay_calc_, placer_opts_.timing_update_type); + + placer_setup_slacks_ = std::make_unique(cluster_ctx.clb_nlist, netlist_pin_lookup_); + + placer_criticalities_ = std::make_unique(cluster_ctx.clb_nlist, netlist_pin_lookup_); + + pin_timing_invalidator_ = make_net_pin_timing_invalidator(placer_opts_.timing_update_type, + net_list, + netlist_pin_lookup_, + atom_ctx.nlist, + atom_ctx.lookup, + *timing_info_->timing_graph(), + /*is_flat=*/false); + + // First time compute timing and costs, compute from scratch + PlaceCritParams crit_params; + crit_params.crit_exponent = placer_opts_.td_place_exp_first; + crit_params.crit_limit = placer_opts_.place_crit_limit; + + initialize_timing_info(crit_params, place_delay_model_.get(), placer_criticalities_.get(), + placer_setup_slacks_.get(), pin_timing_invalidator_.get(), + timing_info_.get(), &costs_, placer_state_); + + critical_path_ = timing_info_->least_slack_critical_path(); + + // Write out the initial timing echo file + if (isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)) { + tatum::write_echo( + getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH), + *timing_ctx.graph, *timing_ctx.constraints, + *placement_delay_calc_, timing_info_->analyzer()); + + tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node); + + write_setup_timing_graph_dot( + getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH) + + std::string(".dot"), + *timing_info_, debug_tnode); + } + + costs_.timing_cost_norm = 1 / costs_.timing_cost; + + // Sanity check that initial placement is legal + check_place_(); + + print_initial_placement_stats_(); + +#ifndef ENABLE_ANALYTIC_PLACE + annealer_ = std::make_unique(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler, + noc_opts, rng, std::move(move_generator), std::move(move_generator2), place_delay_model.get(), + placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), + move_lim); +#endif +} + +void Placer::check_place_() { + const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + const DeviceGrid& device_grid = g_vpr_ctx.device().grid; + const auto& cluster_constraints = g_vpr_ctx.floorplanning().cluster_constraints; + + int error = 0; + + // Verify the placement invariants independent to the placement flow. + error += verify_placement(placer_state_.blk_loc_registry(), + clb_nlist, + device_grid, + cluster_constraints); + + error += check_placement_costs_(); + + if (noc_opts_.noc) { + // check the NoC costs during placement if the user is using the NoC supported flow + error += noc_cost_handler_->check_noc_placement_costs(costs_, PL_INCREMENTAL_COST_TOLERANCE, noc_opts_); + // make sure NoC routing configuration does not create any cycles in CDG + error += (int)noc_cost_handler_->noc_routing_has_cycle(); + } + + if (error == 0) { + VTR_LOG("\n"); + VTR_LOG("Completed placement consistency check successfully.\n"); + + } else { + VPR_ERROR(VPR_ERROR_PLACE, + "\nCompleted placement consistency check, %d errors found.\n" + "Aborting program.\n", + error); + } +} + +int Placer::check_placement_costs_() { + int error = 0; + double timing_cost_check; + + double bb_cost_check = net_cost_handler_.comp_bb_cost(e_cost_methods::CHECK); + + if (fabs(bb_cost_check - costs_.bb_cost) > costs_.bb_cost * PL_INCREMENTAL_COST_TOLERANCE) { + VTR_LOG_ERROR( + "bb_cost_check: %g and bb_cost: %g differ in check_place.\n", + bb_cost_check, costs_.bb_cost); + error++; + } + + if (placer_opts_.place_algorithm.is_timing_driven()) { + comp_td_costs(place_delay_model_.get(), *placer_criticalities_, placer_state_, &timing_cost_check); + //VTR_LOG("timing_cost recomputed from scratch: %g\n", timing_cost_check); + if (fabs(timing_cost_check - costs_.timing_cost) > costs_.timing_cost * PL_INCREMENTAL_COST_TOLERANCE) { + VTR_LOG_ERROR( + "timing_cost_check: %g and timing_cost: %g differ in check_place.\n", + timing_cost_check, costs_.timing_cost); + error++; + } + } + return error; +} + +void Placer::print_initial_placement_stats_() { + VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", + costs_.cost, costs_.bb_cost, costs_.timing_cost); + + if (noc_opts_.noc) { + VTR_ASSERT(noc_cost_handler_.has_value()); + noc_cost_handler_->print_noc_costs("Initial NoC Placement Costs", costs_, noc_opts_); + } + + if (placer_opts_.place_algorithm.is_timing_driven()) { + VTR_LOG("Initial placement estimated Critical Path Delay (CPD): %g ns\n", + 1e9 * critical_path_.delay()); + VTR_LOG("Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n", + 1e9 * timing_info_->setup_total_negative_slack()); + VTR_LOG("Initial placement estimated setup Worst Negative Slack (sWNS): %g ns\n", + 1e9 * timing_info_->setup_worst_negative_slack()); + VTR_LOG("\n"); + VTR_LOG("Initial placement estimated setup slack histogram:\n"); + print_histogram(create_setup_slack_histogram(*timing_info_->setup_analyzer())); + } + + const BlkLocRegistry& blk_loc_registry = placer_state_.blk_loc_registry(); + size_t num_macro_members = 0; + for (const t_pl_macro& macro : blk_loc_registry.place_macros().macros()) { + num_macro_members += macro.members.size(); + } + VTR_LOG("Placement contains %zu placement macros involving %zu blocks (average macro size %f)\n", + blk_loc_registry.place_macros().macros().size(), num_macro_members, + float(num_macro_members) / blk_loc_registry.place_macros().macros().size()); + VTR_LOG("\n"); +} \ No newline at end of file diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h new file mode 100644 index 00000000000..57ebce2a8a2 --- /dev/null +++ b/vpr/src/place/placer.h @@ -0,0 +1,74 @@ + + +#ifndef VTR_PLACER_H +#define VTR_PLACER_H + +#include +#include + +#include "timing_place.h" +#include "place_checkpoint.h" +#include "PlacementDelayCalculator.h" +#include "placer_state.h" +#include "noc_place_utils.h" +#include "net_cost_handler.h" + + +class Placer { + public: + Placer(const Netlist<>& net_list, + const t_placer_opts& placer_opts, + const t_analysis_opts& analysis_opts, + const t_noc_opts& noc_opts, + const std::vector& directs, + std::shared_ptr place_delay_model, + bool cube_bb); + + + //TODO: make this private + public: + const t_placer_opts& placer_opts_; + const t_noc_opts& noc_opts_; + t_placer_costs costs_; + PlacerState placer_state_; + vtr::RngContainer rng_; + NetCostHandler net_cost_handler_; + std::optional noc_cost_handler_; + std::shared_ptr place_delay_model_; + + t_placement_checkpoint placement_checkpoint_; + + std::shared_ptr timing_info_; + std::shared_ptr placement_delay_calc_; + std::unique_ptr placer_setup_slacks_; + std::unique_ptr placer_criticalities_; + std::unique_ptr pin_timing_invalidator_; + tatum::TimingPathInfo critical_path_; + + + std::unique_ptr timer_; + + IntraLbPbPinLookup pb_gpin_lookup_; + ClusteredPinAtomPinsLookup netlist_pin_lookup_; + + std::unique_ptr annealer_; + + private: + void alloc_and_init_timing_objects_(const Netlist<>& net_list, + const t_analysis_opts& analysis_opts); + + /** + * Checks that the placement has not confused our data structures. + * i.e. the clb and block structures agree about the locations of + * every block, blocks are in legal spots, etc. Also recomputes + * the final placement cost from scratch and makes sure it is + * within round-off of what we think the cost is. + */ + void check_place_(); + + int check_placement_costs_(); + + void print_initial_placement_stats_(); +}; + +#endif //VTR_PLACER_H diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index 86dc396e2b8..873633a9c5e 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -170,7 +170,7 @@ static float find_neighboring_average(vtr::NdMatrix& matrix, /******* Globally Accessible Functions **********/ -std::unique_ptr compute_place_delay_model(const t_placer_opts& placer_opts, +std::shared_ptr compute_place_delay_model(const t_placer_opts& placer_opts, const t_router_opts& router_opts, const Netlist<>& net_list, t_det_routing_arch* det_routing_arch, @@ -196,15 +196,15 @@ std::unique_ptr compute_place_delay_model(const t_placer_opts& int longest_length = get_longest_segment_length(segment_inf); /*now setup and compute the actual arrays */ - std::unique_ptr place_delay_model; + std::shared_ptr place_delay_model; float min_cross_layer_delay = get_min_cross_layer_delay(); if (placer_opts.delay_model_type == PlaceDelayModelType::SIMPLE) { - place_delay_model = std::make_unique(); + place_delay_model = std::make_shared(); } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) { - place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); + place_delay_model = std::make_shared(min_cross_layer_delay, is_flat); } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) { - place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); + place_delay_model = std::make_shared(min_cross_layer_delay, is_flat); } else { VTR_ASSERT_MSG(false, "Invalid placer delay model"); } diff --git a/vpr/src/place/timing_place_lookup.h b/vpr/src/place/timing_place_lookup.h index fba3f470483..14897a7fcc4 100644 --- a/vpr/src/place/timing_place_lookup.h +++ b/vpr/src/place/timing_place_lookup.h @@ -2,7 +2,7 @@ #define TIMING_PLACE_LOOKUP_H #include "place_delay_model.h" -std::unique_ptr compute_place_delay_model(const t_placer_opts& placer_opts, +std::shared_ptr compute_place_delay_model(const t_placer_opts& placer_opts, const t_router_opts& router_opts, const Netlist<>& net_list, t_det_routing_arch* det_routing_arch, diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index 6a4d97aefa1..8869cc55ddd 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -136,6 +136,7 @@ std::string rr_node_arch_name(RRNodeId inode, bool is_flat); //Class for looking up pb graph pins from block pin indices class IntraLbPbPinLookup { public: + IntraLbPbPinLookup() = default; IntraLbPbPinLookup(const std::vector& block_types); IntraLbPbPinLookup(const IntraLbPbPinLookup& rhs); IntraLbPbPinLookup& operator=(IntraLbPbPinLookup rhs); From 2298704f9d6731338177ec07d4fa46dcf510c689 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 10:55:21 -0500 Subject: [PATCH 03/32] add place_log_util.h/.cpp --- .../librrgraph/src/base/rr_spatial_lookup.cpp | 2 +- vpr/src/base/read_options.cpp | 2 +- vpr/src/place/place_log_util.cpp | 119 ++++++++++++++++++ vpr/src/place/place_log_util.h | 29 +++++ vpr/src/route/router_lookahead_map.cpp | 14 +-- 5 files changed, 157 insertions(+), 9 deletions(-) create mode 100644 vpr/src/place/place_log_util.cpp create mode 100644 vpr/src/place/place_log_util.h diff --git a/libs/librrgraph/src/base/rr_spatial_lookup.cpp b/libs/librrgraph/src/base/rr_spatial_lookup.cpp index 6959659be8b..3b23d7d49e9 100644 --- a/libs/librrgraph/src/base/rr_spatial_lookup.cpp +++ b/libs/librrgraph/src/base/rr_spatial_lookup.cpp @@ -163,7 +163,7 @@ std::vector RRSpatialLookup::find_nodes(int layer, nodes.reserve(num_nodes); for (const auto& node : rr_node_indices_[type][layer][node_x][node_y][side]) { if (RRNodeId(node)) { - nodes.push_back(RRNodeId(node)); + nodes.emplace_back(node); } } diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 1641e255b89..f789f848808 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2256,7 +2256,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .show_in(argparse::ShowIn::HELP_ONLY); place_timing_grp.add_argument(args.place_delay_model_reducer, "--place_delay_model_reducer") - .help("When calculating delta delays for the placment delay model how are multiple values combined?") + .help("When calculating delta delays for the placement delay model how are multiple values combined?") .default_value("min") .show_in(argparse::ShowIn::HELP_ONLY); diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp new file mode 100644 index 00000000000..d935f474d0a --- /dev/null +++ b/vpr/src/place/place_log_util.cpp @@ -0,0 +1,119 @@ +#include "place_log_util.h" + +#include "vtr_log.h" +#include "annealer.h" +#include "place_util.h" + +void print_place_status_header(bool noc_enabled) { + VTR_LOG("\n"); + if (!noc_enabled) { + VTR_LOG( + "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n"); + VTR_LOG( + "Tnum Time T Av Cost Av BB Cost Av TD Cost CPD sTNS sWNS Ac Rate Std Dev R lim Crit Exp Tot Moves Alpha\n"); + VTR_LOG( + " (sec) (ns) (ns) (ns) \n"); + VTR_LOG( + "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n"); + } else { + VTR_LOG( + "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- --------- ---------\n"); + VTR_LOG( + "Tnum Time T Av Cost Av BB Cost Av TD Cost CPD sTNS sWNS Ac Rate Std Dev R lim Crit Exp Tot Moves Alpha Agg. BW Agg. Lat Lat Over. NoC Cong.\n"); + VTR_LOG( + " (sec) (ns) (ns) (ns) (bps) (ns) (ns) \n"); + VTR_LOG( + "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- --------- ---------\n"); + } +} + +void print_place_status(const t_annealing_state& state, + const t_placer_statistics& stats, + float elapsed_sec, + float cpd, + float sTNS, + float sWNS, + size_t tot_moves, + bool noc_enabled, + const NocCostTerms& noc_cost_terms) { + VTR_LOG( + "%4zu %6.1f %7.1e " + "%7.3f %10.2f %-10.5g " + "%7.3f % 10.3g % 8.3f " + "%7.3f %7.4f %6.1f %8.2f", + state.num_temps, elapsed_sec, state.t, + stats.av_cost, stats.av_bb_cost, stats.av_timing_cost, + 1e9 * cpd, 1e9 * sTNS, 1e9 * sWNS, + stats.success_rate, stats.std_dev, state.rlim, state.crit_exponent); + + pretty_print_uint(" ", tot_moves, 9, 3); + + VTR_LOG(" %6.3f", state.alpha); + + if (noc_enabled) { + VTR_LOG( + " %7.2e %7.2e" + " %8.2e %8.2f", + noc_cost_terms.aggregate_bandwidth, noc_cost_terms.latency, + noc_cost_terms.latency_overrun, noc_cost_terms.congestion); + } + + VTR_LOG("\n"); + fflush(stdout); +} + +void print_resources_utilization(const BlkLocRegistry& blk_loc_registry) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& device_ctx = g_vpr_ctx.device(); + const auto& block_locs = blk_loc_registry.block_locs(); + + size_t max_block_name = 0; + size_t max_tile_name = 0; + + //Record the resource requirement + std::map num_type_instances; + std::map> num_placed_instances; + + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + const t_pl_loc& loc = block_locs[blk_id].loc; + + t_physical_tile_type_ptr physical_tile = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer}); + t_logical_block_type_ptr logical_block = cluster_ctx.clb_nlist.block_type(blk_id); + + num_type_instances[logical_block]++; + num_placed_instances[logical_block][physical_tile]++; + + max_block_name = std::max(max_block_name, logical_block->name.length()); + max_tile_name = std::max(max_tile_name, physical_tile->name.length()); + } + + VTR_LOG("\n"); + VTR_LOG("Placement resource usage:\n"); + for (const auto [logical_block_type_ptr, _] : num_type_instances) { + for (const auto [physical_tile_type_ptr, num_instances] : num_placed_instances[logical_block_type_ptr]) { + VTR_LOG(" %-*s implemented as %-*s: %d\n", max_block_name, + logical_block_type_ptr->name.c_str(), max_tile_name, + physical_tile_type_ptr->name.c_str(), num_instances); + } + } + VTR_LOG("\n"); +} + +void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats) { + size_t total_swap_attempts = swap_stats.num_swap_rejected + swap_stats.num_swap_accepted + swap_stats.num_swap_aborted; + VTR_ASSERT(total_swap_attempts > 0); + + size_t num_swap_print_digits = ceil(log10(total_swap_attempts)); + float reject_rate = (float)swap_stats.num_swap_rejected / total_swap_attempts; + float accept_rate = (float)swap_stats.num_swap_accepted / total_swap_attempts; + float abort_rate = (float)swap_stats.num_swap_aborted / total_swap_attempts; + VTR_LOG("Placement number of temperatures: %d\n", state.num_temps); + VTR_LOG("Placement total # of swap attempts: %*d\n", num_swap_print_digits, + total_swap_attempts); + VTR_LOG("\tSwaps accepted: %*d (%4.1f %%)\n", num_swap_print_digits, + swap_stats.num_swap_accepted, 100 * accept_rate); + VTR_LOG("\tSwaps rejected: %*d (%4.1f %%)\n", num_swap_print_digits, + swap_stats.num_swap_rejected, 100 * reject_rate); + VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits, + swap_stats.num_swap_aborted, 100 * abort_rate); +} \ No newline at end of file diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h new file mode 100644 index 00000000000..22a2bbd9b03 --- /dev/null +++ b/vpr/src/place/place_log_util.h @@ -0,0 +1,29 @@ + +#ifndef VTR_PLACE_LOG_UTIL_H +#define VTR_PLACE_LOG_UTIL_H + +#include + +class t_annealing_state; +class t_placer_statistics; +struct NocCostTerms; +struct t_swap_stats; +class BlkLocRegistry; + +void print_place_status_header(bool noc_enabled); + +void print_place_status(const t_annealing_state& state, + const t_placer_statistics& stats, + float elapsed_sec, + float cpd, + float sTNS, + float sWNS, + size_t tot_moves, + bool noc_enabled, + const NocCostTerms& noc_cost_terms); + +void print_resources_utilization(const BlkLocRegistry& blk_loc_registry); + +void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats); + +#endif //VTR_PLACE_LOG_UTIL_H diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp index 88cd75d46c8..a3d468b901c 100644 --- a/vpr/src/route/router_lookahead_map.cpp +++ b/vpr/src/route/router_lookahead_map.cpp @@ -507,12 +507,12 @@ static void compute_router_wire_lookahead(const std::vector& segm auto& grid = device_ctx.grid; //Re-allocate - f_wire_cost_map = t_wire_cost_map({static_cast(grid.get_num_layers()), - static_cast(grid.get_num_layers()), - 2, - segment_inf_vec.size(), - device_ctx.grid.width(), - device_ctx.grid.height()}); + f_wire_cost_map = t_wire_cost_map({static_cast(grid.get_num_layers()), + static_cast(grid.get_num_layers()), + 2, + segment_inf_vec.size(), + device_ctx.grid.width(), + device_ctx.grid.height()}); int longest_seg_length = 0; for (const auto& seg_inf : segment_inf_vec) { @@ -536,7 +536,7 @@ static void compute_router_wire_lookahead(const std::vector& segm chan_type, segment_inf, std::unordered_map>(), - true); + /*sample_all_locs=*/true); if (routing_cost_map.empty()) { continue; } From afc789b2302592ddb485c9ea389c93c7c937ca19 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 12:03:27 -0500 Subject: [PATCH 04/32] added place() method to Placer class --- vpr/src/place/place.cpp | 307 +---------------------------- vpr/src/place/place_checkpoint.cpp | 2 +- vpr/src/place/place_checkpoint.h | 2 +- vpr/src/place/placer.cpp | 158 ++++++++++++++- vpr/src/place/placer.h | 1 + 5 files changed, 168 insertions(+), 302 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index f7e31bc5bb3..8344bfd5ff9 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -80,22 +80,6 @@ static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, bool is_flat, const BlkLocRegistry& blk_loc_registry); -static void print_place_status_header(bool noc_enabled); - -static void print_place_status(const t_annealing_state& state, - const t_placer_statistics& stats, - float elapsed_sec, - float cpd, - float sTNS, - float sWNS, - size_t tot_moves, - bool noc_enabled, - const NocCostTerms& noc_cost_terms); - -static void print_resources_utilization(const BlkLocRegistry& blk_loc_registry); - -static void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats); - /** * @brief Copies the placement location variables into the global placement context. * @param blk_loc_registry The placement location variables to be copied. @@ -129,10 +113,6 @@ void try_place(const Netlist<>& net_list, const auto& timing_ctx = g_vpr_ctx.timing(); auto pre_place_timing_stats = timing_ctx.stats; - - float sTNS = NAN; - float sWNS = NAN; - char msg[vtr::bufsize]; /* Placement delay model is independent of the placement and can be shared across @@ -192,141 +172,6 @@ void try_place(const Netlist<>& net_list, print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); } - bool skip_anneal = false; - -#ifdef ENABLE_ANALYTIC_PLACE - // Analytic placer: When enabled, skip most of the annealing and go straight to quench - // TODO: refactor goto label. - if (placer_opts.enable_analytic_placer) { - skip_anneal = true; - } -#endif /* ENABLE_ANALYTIC_PLACE */ - - PlacementAnnealer annealer(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler, - noc_opts, rng, std::move(move_generator), std::move(move_generator2), place_delay_model.get(), - placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), move_lim); - - const t_annealing_state& annealing_state = annealer.get_annealing_state(); - const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); - - if (!skip_anneal) { - //Table header - VTR_LOG("\n"); - print_place_status_header(noc_opts.noc); - - /* Outer loop of the simulated annealing begins */ - do { - vtr::Timer temperature_timer; - - annealer.outer_loop_update_timing_info(); - - if (placer_opts.place_algorithm.is_timing_driven()) { - critical_path = timing_info->least_slack_critical_path(); - sTNS = timing_info->setup_total_negative_slack(); - sWNS = timing_info->setup_worst_negative_slack(); - - // see if we should save the current placement solution as a checkpoint - if (placer_opts.place_checkpointing && annealer.get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) { - save_placement_checkpoint_if_needed(blk_loc_registry.block_locs(), - placement_checkpoint, - timing_info, costs, critical_path.delay()); - } - } - - // do a complete inner loop iteration - annealer.placement_inner_loop(); - - print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), - critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(), - noc_opts.noc, costs.noc_cost_terms); - - sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g", - costs.cost, costs.bb_cost, costs.timing_cost, annealing_state.t); - update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info); - - //#ifdef VERBOSE - // if (getEchoEnabled()) { - // print_clb_placement("first_iteration_clb_placement.echo"); - // } - //#endif - } while (annealer.outer_loop_update_state()); - /* Outer loop of the simulated annealing ends */ - } //skip_anneal ends - - // Start Quench - annealer.start_quench(); - - auto pre_quench_timing_stats = timing_ctx.stats; - { /* Quench */ - - vtr::ScopedFinishTimer temperature_timer("Placement Quench"); - - annealer.outer_loop_update_timing_info(); - - /* Run inner loop again with temperature = 0 so as to accept only swaps - * which reduce the cost of the placement */ - annealer.placement_inner_loop(); - - if (placer_opts.place_quench_algorithm.is_timing_driven()) { - critical_path = timing_info->least_slack_critical_path(); - sTNS = timing_info->setup_total_negative_slack(); - sWNS = timing_info->setup_worst_negative_slack(); - } - - print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), - critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(), - noc_opts.noc, costs.noc_cost_terms); - } - auto post_quench_timing_stats = timing_ctx.stats; - - //Final timing analysis - PlaceCritParams crit_params; - crit_params.crit_exponent = annealing_state.crit_exponent; - crit_params.crit_limit = placer_opts.place_crit_limit; - - if (placer_opts.place_algorithm.is_timing_driven()) { - perform_full_timing_update(crit_params, place_delay_model.get(), placer_criticalities.get(), - placer_setup_slacks.get(), pin_timing_invalidator.get(), - timing_info.get(), &costs, placer_state); - VTR_LOG("post-quench CPD = %g (ns) \n", - 1e9 * timing_info->least_slack_critical_path().delay()); - } - - //See if our latest checkpoint is better than the current placement solution - if (placer_opts.place_checkpointing) - restore_best_placement(placer_state, - placement_checkpoint, timing_info, costs, - placer_criticalities, placer_setup_slacks, place_delay_model, - pin_timing_invalidator, crit_params, noc_cost_handler); - - if (placer_opts.placement_saves_per_temperature >= 1) { - std::string filename = vtr::string_fmt("placement_%03d_%03d.place", - annealing_state.num_temps + 1, 0); - VTR_LOG("Saving final placement to file: %s\n", filename.c_str()); - print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); - } - - - //#ifdef VERBOSE - // if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_END_CLB_PLACEMENT)) { - // print_clb_placement(getEchoFileName(E_ECHO_END_CLB_PLACEMENT)); - // } - //#endif - - // Update physical pin values - for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { - blk_loc_registry.place_sync_external_block_connections(block_id); - } - - check_place(costs, - place_delay_model.get(), - placer_criticalities.get(), - placer_opts.place_algorithm, - noc_opts, - placer_state, - net_cost_handler, - noc_cost_handler); - //Some stats VTR_LOG("\n"); VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called); @@ -339,23 +184,19 @@ void try_place(const Netlist<>& net_list, critical_path = timing_info->least_slack_critical_path(); if (isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) { - tatum::write_echo( - getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH), - *timing_ctx.graph, *timing_ctx.constraints, - *placement_delay_calc, timing_info->analyzer()); - - tatum::NodeId debug_tnode = id_or_pin_name_to_tnode( - analysis_opts.echo_dot_timing_graph_node); - write_setup_timing_graph_dot( - getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH) - + std::string(".dot"), - *timing_info, debug_tnode); + tatum::write_echo(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH), + *timing_ctx.graph, *timing_ctx.constraints, + *placement_delay_calc, timing_info->analyzer()); + + tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node); + write_setup_timing_graph_dot(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"), + *timing_info, debug_tnode); } generate_post_place_timing_reports(placer_opts, analysis_opts, *timing_info, *placement_delay_calc, is_flat, blk_loc_registry); - /* Print critical path delay metrics */ + // Print critical path delay metrics VTR_LOG("\n"); print_setup_timing_summary(*timing_ctx.constraints, *timing_info->setup_analyzer(), "Placement estimated ", ""); @@ -380,28 +221,9 @@ void try_place(const Netlist<>& net_list, } update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info); - // Print out swap statistics - print_resources_utilization(blk_loc_registry); - - print_placement_swaps_stats(annealing_state, swap_stats); - - move_type_stats.print_placement_move_types_stats(); - - if (noc_opts.noc) { - write_noc_placement_file(noc_opts.noc_placement_file_name, blk_loc_registry.block_locs()); - } free_placement_structs(); - print_timing_stats("Placement Quench", post_quench_timing_stats, pre_quench_timing_stats); - print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats); - - VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n", - p_runtime_ctx.f_update_td_costs_connections_elapsed_sec, - p_runtime_ctx.f_update_td_costs_nets_elapsed_sec, - p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec, - p_runtime_ctx.f_update_td_costs_total_elapsed_sec); - copy_locs_to_global_state(blk_loc_registry); } @@ -505,119 +327,6 @@ static void update_screen_debug() { } #endif -static void print_place_status_header(bool noc_enabled) { - if (!noc_enabled) { - VTR_LOG( - "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n"); - VTR_LOG( - "Tnum Time T Av Cost Av BB Cost Av TD Cost CPD sTNS sWNS Ac Rate Std Dev R lim Crit Exp Tot Moves Alpha\n"); - VTR_LOG( - " (sec) (ns) (ns) (ns) \n"); - VTR_LOG( - "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n"); - } else { - VTR_LOG( - "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- --------- ---------\n"); - VTR_LOG( - "Tnum Time T Av Cost Av BB Cost Av TD Cost CPD sTNS sWNS Ac Rate Std Dev R lim Crit Exp Tot Moves Alpha Agg. BW Agg. Lat Lat Over. NoC Cong.\n"); - VTR_LOG( - " (sec) (ns) (ns) (ns) (bps) (ns) (ns) \n"); - VTR_LOG( - "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- --------- ---------\n"); - } -} - -static void print_place_status(const t_annealing_state& state, - const t_placer_statistics& stats, - float elapsed_sec, - float cpd, - float sTNS, - float sWNS, - size_t tot_moves, - bool noc_enabled, - const NocCostTerms& noc_cost_terms) { - VTR_LOG( - "%4zu %6.1f %7.1e " - "%7.3f %10.2f %-10.5g " - "%7.3f % 10.3g % 8.3f " - "%7.3f %7.4f %6.1f %8.2f", - state.num_temps, elapsed_sec, state.t, - stats.av_cost, stats.av_bb_cost, stats.av_timing_cost, - 1e9 * cpd, 1e9 * sTNS, 1e9 * sWNS, - stats.success_rate, stats.std_dev, state.rlim, state.crit_exponent); - - pretty_print_uint(" ", tot_moves, 9, 3); - - VTR_LOG(" %6.3f", state.alpha); - - if (noc_enabled) { - VTR_LOG( - " %7.2e %7.2e" - " %8.2e %8.2f", - noc_cost_terms.aggregate_bandwidth, noc_cost_terms.latency, - noc_cost_terms.latency_overrun, noc_cost_terms.congestion); - } - - VTR_LOG("\n"); - fflush(stdout); -} - -static void print_resources_utilization(const BlkLocRegistry& blk_loc_registry) { - const auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& device_ctx = g_vpr_ctx.device(); - const auto& block_locs = blk_loc_registry.block_locs(); - - size_t max_block_name = 0; - size_t max_tile_name = 0; - - //Record the resource requirement - std::map num_type_instances; - std::map> num_placed_instances; - - for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { - const t_pl_loc& loc = block_locs[blk_id].loc; - - t_physical_tile_type_ptr physical_tile = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer}); - t_logical_block_type_ptr logical_block = cluster_ctx.clb_nlist.block_type(blk_id); - - num_type_instances[logical_block]++; - num_placed_instances[logical_block][physical_tile]++; - - max_block_name = std::max(max_block_name, logical_block->name.length()); - max_tile_name = std::max(max_tile_name, physical_tile->name.length()); - } - - VTR_LOG("\n"); - VTR_LOG("Placement resource usage:\n"); - for (const auto [logical_block_type_ptr, _] : num_type_instances) { - for (const auto [physical_tile_type_ptr, num_instances] : num_placed_instances[logical_block_type_ptr]) { - VTR_LOG(" %-*s implemented as %-*s: %d\n", max_block_name, - logical_block_type_ptr->name.c_str(), max_tile_name, - physical_tile_type_ptr->name.c_str(), num_instances); - } - } - VTR_LOG("\n"); -} - -static void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats) { - size_t total_swap_attempts = swap_stats.num_swap_rejected + swap_stats.num_swap_accepted + swap_stats.num_swap_aborted; - VTR_ASSERT(total_swap_attempts > 0); - - size_t num_swap_print_digits = ceil(log10(total_swap_attempts)); - float reject_rate = (float)swap_stats.num_swap_rejected / total_swap_attempts; - float accept_rate = (float)swap_stats.num_swap_accepted / total_swap_attempts; - float abort_rate = (float)swap_stats.num_swap_aborted / total_swap_attempts; - VTR_LOG("Placement number of temperatures: %d\n", state.num_temps); - VTR_LOG("Placement total # of swap attempts: %*d\n", num_swap_print_digits, - total_swap_attempts); - VTR_LOG("\tSwaps accepted: %*d (%4.1f %%)\n", num_swap_print_digits, - swap_stats.num_swap_accepted, 100 * accept_rate); - VTR_LOG("\tSwaps rejected: %*d (%4.1f %%)\n", num_swap_print_digits, - swap_stats.num_swap_rejected, 100 * reject_rate); - VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits, - swap_stats.num_swap_aborted, 100 * abort_rate); -} - static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry) { auto& place_ctx = g_vpr_ctx.mutable_placement(); diff --git a/vpr/src/place/place_checkpoint.cpp b/vpr/src/place/place_checkpoint.cpp index 85f4ab28e18..60b009d85ae 100644 --- a/vpr/src/place/place_checkpoint.cpp +++ b/vpr/src/place/place_checkpoint.cpp @@ -42,7 +42,7 @@ void restore_best_placement(PlacerState& placer_state, t_placer_costs& costs, std::unique_ptr& placer_criticalities, std::unique_ptr& placer_setup_slacks, - std::unique_ptr& place_delay_model, + std::shared_ptr& place_delay_model, std::unique_ptr& pin_timing_invalidator, PlaceCritParams crit_params, std::optional& noc_cost_handler) { diff --git a/vpr/src/place/place_checkpoint.h b/vpr/src/place/place_checkpoint.h index 8c2313e7117..9a3fe76d5d8 100644 --- a/vpr/src/place/place_checkpoint.h +++ b/vpr/src/place/place_checkpoint.h @@ -74,7 +74,7 @@ void restore_best_placement(PlacerState& placer_state, t_placer_costs& costs, std::unique_ptr& placer_criticalities, std::unique_ptr& placer_setup_slacks, - std::unique_ptr& place_delay_model, + std::shared_ptr& place_delay_model, std::unique_ptr& pin_timing_invalidator, PlaceCritParams crit_params, std::optional& noc_cost_handler); diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index d0b59b7a17a..03d861055c2 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -9,6 +9,10 @@ #include "tatum/echo_writer.hpp" #include "verify_placement.h" #include "place_timing_update.h" +#include "annealer.h" +#include "RL_agent_util.h" +#include "place_log_util.h" +#include "place_checkpoint.h" Placer::Placer(const Netlist<>& net_list, const t_placer_opts& placer_opts, @@ -278,4 +282,156 @@ void Placer::print_initial_placement_stats_() { blk_loc_registry.place_macros().macros().size(), num_macro_members, float(num_macro_members) / blk_loc_registry.place_macros().macros().size()); VTR_LOG("\n"); -} \ No newline at end of file +} + +void Placer::place() { + const auto& timing_ctx = g_vpr_ctx.timing(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& p_runtime_ctx = placer_state_.runtime(); + + bool skip_anneal = false; +#ifdef ENABLE_ANALYTIC_PLACE + // When enabled, skip most of the annealing and go straight to quench + if (placer_opts_.enable_analytic_placer) { + skip_anneal = true; + } +#endif + + float sTNS = NAN; + float sWNS = NAN; + + const t_annealing_state& annealing_state = annealer_.get_annealing_state(); + const auto& [swap_stats, move_type_stats, placer_stats] = annealer_.get_stats(); + + if (!skip_anneal) { + //Table header + print_place_status_header(noc_opts_.noc); + + // Outer loop of the simulated annealing begins + do { + vtr::Timer temperature_timer; + + annealer_.outer_loop_update_timing_info(); + + if (placer_opts_.place_algorithm.is_timing_driven()) { + critical_path_ = timing_info_->least_slack_critical_path(); + sTNS = timing_info_->setup_total_negative_slack(); + sWNS = timing_info_->setup_worst_negative_slack(); + + // see if we should save the current placement solution as a checkpoint + if (placer_opts_.place_checkpointing && annealer_.get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) { + save_placement_checkpoint_if_needed(placer_state_.mutable_block_locs(), + placement_checkpoint_, + timing_info_, costs_, critical_path_.delay()); + } + } + + // do a complete inner loop iteration + annealer_.placement_inner_loop(); + + print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), + critical_path_.delay(), sTNS, sWNS, annealer_.get_total_iteration(), + noc_opts_.noc, costs_.noc_cost_terms); + +// sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g", +// costs_.cost, costs_.bb_cost, costs_.timing_cost, annealing_state.t); +// +// update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info_); + + //#ifdef VERBOSE + // if (getEchoEnabled()) { + // print_clb_placement("first_iteration_clb_placement.echo"); + // } + //#endif + + // Outer loop of the simulated annealing ends + } while (annealer_.outer_loop_update_state()); + } //skip_anneal ends + + // Start Quench + annealer_.start_quench(); + + auto pre_quench_timing_stats = timing_ctx.stats; + { // Quench + vtr::ScopedFinishTimer temperature_timer("Placement Quench"); + + annealer_.outer_loop_update_timing_info(); + + /* Run inner loop again with temperature = 0 so as to accept only swaps + * which reduce the cost of the placement */ + annealer_.placement_inner_loop(); + + if (placer_opts_.place_quench_algorithm.is_timing_driven()) { + critical_path_ = timing_info_->least_slack_critical_path(); + sTNS = timing_info_->setup_total_negative_slack(); + sWNS = timing_info_->setup_worst_negative_slack(); + } + + print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), + critical_path_.delay(), sTNS, sWNS, annealer_.get_total_iteration(), + noc_opts_.noc, costs_.noc_cost_terms); + } + auto post_quench_timing_stats = timing_ctx.stats; + + // Final timing analysis + PlaceCritParams crit_params; + crit_params.crit_exponent = annealing_state.crit_exponent; + crit_params.crit_limit = placer_opts_.place_crit_limit; + + if (placer_opts_.place_algorithm.is_timing_driven()) { + perform_full_timing_update(crit_params, place_delay_model_.get(), placer_criticalities_.get(), + placer_setup_slacks_.get(), pin_timing_invalidator_.get(), + timing_info_.get(), &costs_, placer_state_); + VTR_LOG("post-quench CPD = %g (ns) \n", + 1e9 * timing_info_->least_slack_critical_path().delay()); + } + + // See if our latest checkpoint is better than the current placement solution + if (placer_opts_.place_checkpointing) { + restore_best_placement(placer_state_, + placement_checkpoint_, timing_info_, costs_, + placer_criticalities_, placer_setup_slacks_, place_delay_model_, + pin_timing_invalidator_, crit_params, noc_cost_handler_); + } + + if (placer_opts_.placement_saves_per_temperature >= 1) { + std::string filename = vtr::string_fmt("placement_%03d_%03d.place", + annealing_state.num_temps + 1, 0); + VTR_LOG("Saving final placement to file: %s\n", filename.c_str()); + print_place(nullptr, nullptr, filename.c_str(), placer_state_.mutable_block_locs()); + } + + //#ifdef VERBOSE + // if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_END_CLB_PLACEMENT)) { + // print_clb_placement(getEchoFileName(E_ECHO_END_CLB_PLACEMENT)); + // } + //#endif + + // Update physical pin values + for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { + placer_state_.mutable_blk_loc_registry().place_sync_external_block_connections(block_id); + } + + check_place_(); + + + // Print out swap statistics + print_resources_utilization(placer_state_.blk_loc_registry()); + + print_placement_swaps_stats(annealing_state, swap_stats); + + move_type_stats.print_placement_move_types_stats(); + + if (noc_opts_.noc) { + write_noc_placement_file(noc_opts_.noc_placement_file_name, placer_state_.block_locs()); + } + + print_timing_stats("Placement Quench", post_quench_timing_stats, pre_quench_timing_stats); + print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats); + + VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n", + p_runtime_ctx.f_update_td_costs_connections_elapsed_sec, + p_runtime_ctx.f_update_td_costs_nets_elapsed_sec, + p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec, + p_runtime_ctx.f_update_td_costs_total_elapsed_sec); +} diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index 57ebce2a8a2..a12fa65758b 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -24,6 +24,7 @@ class Placer { std::shared_ptr place_delay_model, bool cube_bb); + void place(); //TODO: make this private public: From da5ebf6e3914a564db8b645367668bce19ebf966 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 12:48:04 -0500 Subject: [PATCH 05/32] add print_post_placement_stats_() to Placer --- vpr/src/place/place.cpp | 123 +-------------------------- vpr/src/place/place_log_util.cpp | 25 ++++++ vpr/src/place/place_log_util.h | 12 +++ vpr/src/place/placer.cpp | 140 +++++++++++++++++++++++++------ vpr/src/place/placer.h | 12 +++ 5 files changed, 166 insertions(+), 146 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 8344bfd5ff9..145a73d483a 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -73,19 +73,6 @@ static void free_placement_structs(); static int count_connections(); -static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, - const t_analysis_opts& analysis_opts, - const SetupTimingInfo& timing_info, - const PlacementDelayCalculator& delay_calc, - bool is_flat, - const BlkLocRegistry& blk_loc_registry); - -/** - * @brief Copies the placement location variables into the global placement context. - * @param blk_loc_registry The placement location variables to be copied. - */ -static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry); - /*****************************************************************************/ void try_place(const Netlist<>& net_list, const t_placer_opts& placer_opts, @@ -108,13 +95,9 @@ void try_place(const Netlist<>& net_list, */ VTR_ASSERT(!is_flat); const auto& device_ctx = g_vpr_ctx.device(); - const auto& atom_ctx = g_vpr_ctx.atom(); - const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& timing_ctx = g_vpr_ctx.timing(); auto pre_place_timing_stats = timing_ctx.stats; - char msg[vtr::bufsize]; - /* Placement delay model is independent of the placement and can be shared across * multiple placers. So, it is created and initialized once. */ std::shared_ptr place_delay_model; @@ -141,8 +124,6 @@ void try_place(const Netlist<>& net_list, VTR_LOG("Bounding box mode is %s\n", (cube_bb ? "Cube" : "Per-layer")); VTR_LOG("\n"); - int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333)); - auto& place_ctx = g_vpr_ctx.mutable_placement(); place_ctx.lock_loc_vars(); place_ctx.compressed_block_grids = create_compressed_block_grids(); @@ -158,73 +139,11 @@ void try_place(const Netlist<>& net_list, const int width_fac = placer_opts.place_chan_width; init_draw_coords((float)width_fac, placer.placer_state_.blk_loc_registry()); - sprintf(msg, - "Initial Placement. Cost: %g BB Cost: %g TD Cost %g \t Channel Factor: %d", - costs.cost, costs.bb_cost, costs.timing_cost, width_fac); - - // Draw the initial placement - update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info); - - if (placer_opts.placement_saves_per_temperature >= 1) { - std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0, - 0); - VTR_LOG("Saving initial placement to file: %s\n", filename.c_str()); - print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); - } - - //Some stats - VTR_LOG("\n"); - VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called); - blocks_affected.move_abortion_logger.report_aborted_moves(); - - if (placer_opts.place_algorithm.is_timing_driven()) { - //Final timing estimate - VTR_ASSERT(timing_info); - - critical_path = timing_info->least_slack_critical_path(); - - if (isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) { - tatum::write_echo(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH), - *timing_ctx.graph, *timing_ctx.constraints, - *placement_delay_calc, timing_info->analyzer()); - - tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node); - write_setup_timing_graph_dot(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"), - *timing_info, debug_tnode); - } - - generate_post_place_timing_reports(placer_opts, analysis_opts, *timing_info, - *placement_delay_calc, is_flat, blk_loc_registry); - - // Print critical path delay metrics - VTR_LOG("\n"); - print_setup_timing_summary(*timing_ctx.constraints, - *timing_info->setup_analyzer(), "Placement estimated ", ""); - } - - sprintf(msg, - "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d", - costs.cost, costs.bb_cost, costs.timing_cost, width_fac); - VTR_LOG("Placement cost: %g, bb_cost: %g, td_cost: %g, \n", costs.cost, - costs.bb_cost, costs.timing_cost); - // print the noc costs info - if (noc_opts.noc) { - VTR_ASSERT(noc_cost_handler.has_value()); - noc_cost_handler->print_noc_costs("\nNoC Placement Costs", costs, noc_opts); - -#ifdef ENABLE_NOC_SAT_ROUTING - if (costs.noc_cost_terms.congestion > 0.0) { - VTR_LOG("NoC routing configuration is congested. Invoking the SAT NoC router.\n"); - invoke_sat_router(costs, noc_opts, placer_opts.seed); - } -#endif //ENABLE_NOC_SAT_ROUTING - } - - update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info); + placer.place(); free_placement_structs(); - copy_locs_to_global_state(blk_loc_registry); + placer.copy_locs_to_global_state(); } /*only count non-global connections */ @@ -269,7 +188,7 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, return cube_bb; } -/* Frees the major structures needed by the placer (and not needed * +/* Frees the major structures needed by the placer (and not needed * elsewhere). */ static void free_placement_structs() { auto& place_ctx = g_vpr_ctx.mutable_placement(); @@ -295,27 +214,6 @@ void print_clb_placement(const char* fname) { } #endif -static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, - const t_analysis_opts& analysis_opts, - const SetupTimingInfo& timing_info, - const PlacementDelayCalculator& delay_calc, - bool is_flat, - const BlkLocRegistry& blk_loc_registry) { - const auto& timing_ctx = g_vpr_ctx.timing(); - const auto& atom_ctx = g_vpr_ctx.atom(); - - VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, - delay_calc, is_flat, blk_loc_registry); - resolver.set_detail_level(analysis_opts.timing_report_detail); - - tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph, - *timing_ctx.constraints); - - timing_reporter.report_timing_setup( - placer_opts.post_place_timing_report_file, - *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths); -} - #if 0 static void update_screen_debug(); @@ -327,18 +225,3 @@ static void update_screen_debug() { } #endif -static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); - - // the placement location variables should be unlocked before being accessed - place_ctx.unlock_loc_vars(); - - // copy the local location variables into the global state - auto& global_blk_loc_registry = place_ctx.mutable_blk_loc_registry(); - global_blk_loc_registry = blk_loc_registry; - -#ifndef NO_GRAPHICS - // update the graphics' reference to placement location variables - get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry); -#endif -} \ No newline at end of file diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp index d935f474d0a..62fd3e57ca1 100644 --- a/vpr/src/place/place_log_util.cpp +++ b/vpr/src/place/place_log_util.cpp @@ -3,6 +3,10 @@ #include "vtr_log.h" #include "annealer.h" #include "place_util.h" +#include "PostClusterDelayCalculator.h" +#include "tatum/TimingReporter.hpp" +#include "VprTimingGraphResolver.h" +#include "timing_info.h" void print_place_status_header(bool noc_enabled) { VTR_LOG("\n"); @@ -116,4 +120,25 @@ void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_st swap_stats.num_swap_rejected, 100 * reject_rate); VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits, swap_stats.num_swap_aborted, 100 * abort_rate); +} + +void generate_post_place_timing_reports(const t_placer_opts& placer_opts, + const t_analysis_opts& analysis_opts, + const SetupTimingInfo& timing_info, + const PlacementDelayCalculator& delay_calc, + bool is_flat, + const BlkLocRegistry& blk_loc_registry) { + const auto& timing_ctx = g_vpr_ctx.timing(); + const auto& atom_ctx = g_vpr_ctx.atom(); + + VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, + delay_calc, is_flat, blk_loc_registry); + resolver.set_detail_level(analysis_opts.timing_report_detail); + + tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph, + *timing_ctx.constraints); + + timing_reporter.report_timing_setup( + placer_opts.post_place_timing_report_file, + *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths); } \ No newline at end of file diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h index 22a2bbd9b03..7e8567a97a0 100644 --- a/vpr/src/place/place_log_util.h +++ b/vpr/src/place/place_log_util.h @@ -4,8 +4,13 @@ #include +#include "timing_info_fwd.h" +#include "PlacementDelayCalculator.h" + class t_annealing_state; class t_placer_statistics; +struct t_placer_opts; +struct t_analysis_opts; struct NocCostTerms; struct t_swap_stats; class BlkLocRegistry; @@ -26,4 +31,11 @@ void print_resources_utilization(const BlkLocRegistry& blk_loc_registry); void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats); +void generate_post_place_timing_reports(const t_placer_opts& placer_opts, + const t_analysis_opts& analysis_opts, + const SetupTimingInfo& timing_info, + const PlacementDelayCalculator& delay_calc, + bool is_flat, + const BlkLocRegistry& blk_loc_registry); + #endif //VTR_PLACE_LOG_UTIL_H diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 03d861055c2..f17f56c8b47 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -1,7 +1,10 @@ #include "placer.h" +#include + #include "vtr_time.h" +#include "draw.h" #include "read_place.h" #include "analytic_placer.h" #include "initial_placement.h" @@ -22,12 +25,13 @@ Placer::Placer(const Netlist<>& net_list, std::shared_ptr place_delay_model, bool cube_bb) : placer_opts_(placer_opts) + , analysis_opts_(analysis_opts) , noc_opts_(noc_opts) , costs_(placer_opts.place_algorithm, noc_opts.noc) , placer_state_(placer_opts.place_algorithm.is_timing_driven(), cube_bb) , rng_(placer_opts.seed) , net_cost_handler_(placer_opts, placer_state_, cube_bb) - , place_delay_model_(place_delay_model){ + , place_delay_model_(std::move(place_delay_model)){ const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& device_ctx = g_vpr_ctx.device(); const auto& atom_ctx = g_vpr_ctx.atom(); @@ -59,8 +63,9 @@ Placer::Placer(const Netlist<>& net_list, initial_placement(placer_opts, placer_opts.constraints_file.c_str(), noc_opts, blk_loc_registry, noc_cost_handler_, rng_); + const int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333)); //create the move generator based on the chosen placement strategy -// auto [move_generator, move_generator2] = create_move_generators(placer_state_, placer_opts, move_lim, noc_opts.noc_centroid_weight, rng_); + auto [move_generator, move_generator2] = create_move_generators(placer_state_, placer_opts, move_lim, noc_opts.noc_centroid_weight, rng_); if (!placer_opts.write_initial_place_file.empty()) { print_place(nullptr, nullptr, placer_opts.write_initial_place_file.c_str(), placer_state_.block_locs()); @@ -117,6 +122,16 @@ Placer::Placer(const Netlist<>& net_list, // set the starting total placement cost costs_.cost = costs_.get_total_cost(placer_opts, noc_opts); + + // Sanity check that initial placement is legal + check_place_(); + + print_initial_placement_stats_(); + + annealer_ = std::make_unique(placer_opts_, placer_state_, costs_, net_cost_handler_, noc_cost_handler_, + noc_opts_, rng_, std::move(move_generator), std::move(move_generator2), place_delay_model_.get(), + placer_criticalities_.get(), placer_setup_slacks_.get(), timing_info_.get(), pin_timing_invalidator_.get(), + move_lim); } void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list, @@ -178,18 +193,6 @@ void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list, } costs_.timing_cost_norm = 1 / costs_.timing_cost; - - // Sanity check that initial placement is legal - check_place_(); - - print_initial_placement_stats_(); - -#ifndef ENABLE_ANALYTIC_PLACE - annealer_ = std::make_unique(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler, - noc_opts, rng, std::move(move_generator), std::move(move_generator2), place_delay_model.get(), - placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), - move_lim); -#endif } void Placer::check_place_() { @@ -282,6 +285,20 @@ void Placer::print_initial_placement_stats_() { blk_loc_registry.place_macros().macros().size(), num_macro_members, float(num_macro_members) / blk_loc_registry.place_macros().macros().size()); VTR_LOG("\n"); + + char msg[vtr::bufsize]; + sprintf(msg, + "Initial Placement. Cost: %g BB Cost: %g TD Cost %g \t Channel Factor: %d", + costs_.cost, costs_.bb_cost, costs_.timing_cost, placer_opts_.place_chan_width); + + // Draw the initial placement + update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info_); + + if (placer_opts_.placement_saves_per_temperature >= 1) { + std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0, 0); + VTR_LOG("Saving initial placement to file: %s\n", filename.c_str()); + print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); + } } void Placer::place() { @@ -300,8 +317,8 @@ void Placer::place() { float sTNS = NAN; float sWNS = NAN; - const t_annealing_state& annealing_state = annealer_.get_annealing_state(); - const auto& [swap_stats, move_type_stats, placer_stats] = annealer_.get_stats(); + const t_annealing_state& annealing_state = annealer_->get_annealing_state(); + const auto& [swap_stats, move_type_stats, placer_stats] = annealer_->get_stats(); if (!skip_anneal) { //Table header @@ -311,7 +328,7 @@ void Placer::place() { do { vtr::Timer temperature_timer; - annealer_.outer_loop_update_timing_info(); + annealer_->outer_loop_update_timing_info(); if (placer_opts_.place_algorithm.is_timing_driven()) { critical_path_ = timing_info_->least_slack_critical_path(); @@ -319,7 +336,7 @@ void Placer::place() { sWNS = timing_info_->setup_worst_negative_slack(); // see if we should save the current placement solution as a checkpoint - if (placer_opts_.place_checkpointing && annealer_.get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) { + if (placer_opts_.place_checkpointing && annealer_->get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) { save_placement_checkpoint_if_needed(placer_state_.mutable_block_locs(), placement_checkpoint_, timing_info_, costs_, critical_path_.delay()); @@ -327,10 +344,10 @@ void Placer::place() { } // do a complete inner loop iteration - annealer_.placement_inner_loop(); + annealer_->placement_inner_loop(); print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), - critical_path_.delay(), sTNS, sWNS, annealer_.get_total_iteration(), + critical_path_.delay(), sTNS, sWNS, annealer_->get_total_iteration(), noc_opts_.noc, costs_.noc_cost_terms); // sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g", @@ -345,21 +362,21 @@ void Placer::place() { //#endif // Outer loop of the simulated annealing ends - } while (annealer_.outer_loop_update_state()); + } while (annealer_->outer_loop_update_state()); } //skip_anneal ends // Start Quench - annealer_.start_quench(); + annealer_->start_quench(); auto pre_quench_timing_stats = timing_ctx.stats; { // Quench vtr::ScopedFinishTimer temperature_timer("Placement Quench"); - annealer_.outer_loop_update_timing_info(); + annealer_->outer_loop_update_timing_info(); /* Run inner loop again with temperature = 0 so as to accept only swaps * which reduce the cost of the placement */ - annealer_.placement_inner_loop(); + annealer_->placement_inner_loop(); if (placer_opts_.place_quench_algorithm.is_timing_driven()) { critical_path_ = timing_info_->least_slack_critical_path(); @@ -368,7 +385,7 @@ void Placer::place() { } print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), - critical_path_.delay(), sTNS, sWNS, annealer_.get_total_iteration(), + critical_path_.delay(), sTNS, sWNS, annealer_->get_total_iteration(), noc_opts_.noc, costs_.noc_cost_terms); } auto post_quench_timing_stats = timing_ctx.stats; @@ -427,7 +444,7 @@ void Placer::place() { } print_timing_stats("Placement Quench", post_quench_timing_stats, pre_quench_timing_stats); - print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats); +// print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats); VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n", p_runtime_ctx.f_update_td_costs_connections_elapsed_sec, @@ -435,3 +452,74 @@ void Placer::place() { p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec, p_runtime_ctx.f_update_td_costs_total_elapsed_sec); } + +void Placer::print_post_placement_stats_() { + const auto& timing_ctx = g_vpr_ctx.timing(); + const auto& [swap_stats, move_type_stats, placer_stats] = annealer_->get_stats(); + + VTR_LOG("\n"); + VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called); +// blocks_affected.move_abortion_logger.report_aborted_moves(); + + if (placer_opts_.place_algorithm.is_timing_driven()) { + //Final timing estimate + VTR_ASSERT(timing_info_); + + critical_path_ = timing_info_->least_slack_critical_path(); + + if (isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) { + tatum::write_echo(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH), + *timing_ctx.graph, *timing_ctx.constraints, + *placement_delay_calc_, timing_info_->analyzer()); + + tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts_.echo_dot_timing_graph_node); + write_setup_timing_graph_dot(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"), + *timing_info_, debug_tnode); + } + + generate_post_place_timing_reports(placer_opts_, analysis_opts_, *timing_info_, + *placement_delay_calc_, /*is_flat=*/false, placer_state_.blk_loc_registry()); + + // Print critical path delay metrics + VTR_LOG("\n"); + print_setup_timing_summary(*timing_ctx.constraints, + *timing_info_->setup_analyzer(), "Placement estimated ", ""); + } + + char msg[vtr::bufsize]; + sprintf(msg, + "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d", + costs_.cost, costs_.bb_cost, costs_.timing_cost, placer_opts_.place_chan_width); + VTR_LOG("Placement cost: %g, bb_cost: %g, td_cost: %g, \n", costs_.cost, + costs_.bb_cost, costs_.timing_cost); + update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info_); + + // print the noc costs info + if (noc_opts_.noc) { + VTR_ASSERT(noc_cost_handler_.has_value()); + noc_cost_handler_->print_noc_costs("\nNoC Placement Costs", costs_, noc_opts_); + +#ifdef ENABLE_NOC_SAT_ROUTING + if (costs.noc_cost_terms.congestion > 0.0) { + VTR_LOG("NoC routing configuration is congested. Invoking the SAT NoC router.\n"); + invoke_sat_router(costs, noc_opts, placer_opts.seed); + } +#endif //ENABLE_NOC_SAT_ROUTING + } +} + +void Placer::copy_locs_to_global_state() { + auto& place_ctx = g_vpr_ctx.mutable_placement(); + + // the placement location variables should be unlocked before being accessed + place_ctx.unlock_loc_vars(); + + // copy the local location variables into the global state + auto& global_blk_loc_registry = place_ctx.mutable_blk_loc_registry(); + global_blk_loc_registry = placer_state_.blk_loc_registry(); + +#ifndef NO_GRAPHICS + // update the graphics' reference to placement location variables + get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry); +#endif +} diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index a12fa65758b..c24c045b3ca 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -13,6 +13,10 @@ #include "noc_place_utils.h" #include "net_cost_handler.h" +class PlacementAnnealer; +namespace vtr{ +class ScopedStartFinishTimer; +} class Placer { public: @@ -26,9 +30,15 @@ class Placer { void place(); + /** + * @brief Copies the placement location variables into the global placement context. + */ + void copy_locs_to_global_state(); + //TODO: make this private public: const t_placer_opts& placer_opts_; + const t_analysis_opts& analysis_opts_; const t_noc_opts& noc_opts_; t_placer_costs costs_; PlacerState placer_state_; @@ -70,6 +80,8 @@ class Placer { int check_placement_costs_(); void print_initial_placement_stats_(); + + void print_post_placement_stats_(); }; #endif //VTR_PLACER_H From d1d5e7fb98615d041f75bcc6aa736d80e7aba49f Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 12:51:33 -0500 Subject: [PATCH 06/32] removed unused inclusions from place.cpp --- vpr/src/place/place.cpp | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 145a73d483a..4030c04e216 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -1,58 +1,26 @@ -#include -#include #include -#include -#include -#include "NetPinTimingInvalidator.h" -#include "clustered_netlist.h" -#include "device_grid.h" -#include "verify_placement.h" #include "vtr_assert.h" #include "vtr_log.h" -#include "vtr_util.h" #include "vtr_time.h" -#include "vtr_math.h" - #include "vpr_types.h" -#include "vpr_error.h" #include "vpr_utils.h" #include "globals.h" #include "place.h" #include "annealer.h" -#include "read_place.h" #include "draw.h" -#include "timing_place.h" #include "read_xml_arch_file.h" #include "echo_files.h" #include "histogram.h" -#include "place_util.h" -#include "analytic_placer.h" -#include "initial_placement.h" #include "place_delay_model.h" -#include "place_timing_update.h" -#include "move_transactions.h" #include "move_utils.h" #include "buttons.h" -#include "PlacementDelayCalculator.h" #include "VprTimingGraphResolver.h" -#include "timing_util.h" -#include "timing_info.h" -#include "concrete_timing_info.h" -#include "tatum/echo_writer.hpp" #include "tatum/TimingReporter.hpp" #include "RL_agent_util.h" -#include "place_checkpoint.h" - -#include "clustered_netlist_utils.h" - -#include "noc_place_utils.h" - -#include "net_cost_handler.h" -#include "placer_state.h" #include "placer.h" /********************* Static subroutines local to place.c *******************/ From a3bb7628b774342d66c709aa247f2d166f6c2891 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 13:20:07 -0500 Subject: [PATCH 07/32] add PlacementLogPrinter class --- vpr/src/place/place.cpp | 23 ++------------ vpr/src/place/place_log_util.cpp | 51 ++++++++++++++++++++------------ vpr/src/place/place_log_util.h | 24 +++++++-------- vpr/src/place/placer.cpp | 22 ++++++-------- vpr/src/place/placer.h | 3 ++ 5 files changed, 57 insertions(+), 66 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 4030c04e216..fa9f007dbb9 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -39,8 +39,6 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, static void free_placement_structs(); -static int count_connections(); - /*****************************************************************************/ void try_place(const Netlist<>& net_list, const t_placer_opts& placer_opts, @@ -63,8 +61,8 @@ void try_place(const Netlist<>& net_list, */ VTR_ASSERT(!is_flat); const auto& device_ctx = g_vpr_ctx.device(); - const auto& timing_ctx = g_vpr_ctx.timing(); - auto pre_place_timing_stats = timing_ctx.stats; +// const auto& timing_ctx = g_vpr_ctx.timing(); +// auto pre_place_timing_stats = timing_ctx.stats; /* Placement delay model is independent of the placement and can be shared across * multiple placers. So, it is created and initialized once. */ @@ -114,23 +112,6 @@ void try_place(const Netlist<>& net_list, placer.copy_locs_to_global_state(); } -/*only count non-global connections */ -static int count_connections() { - auto& cluster_ctx = g_vpr_ctx.clustering(); - - int count = 0; - - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { - if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) { - continue; - } - - count += cluster_ctx.clb_nlist.net_sinks(net_id).size(); - } - - return count; -} - static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, const RRGraphView& rr_graph) { bool cube_bb; diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp index 62fd3e57ca1..4f82ef6e442 100644 --- a/vpr/src/place/place_log_util.cpp +++ b/vpr/src/place/place_log_util.cpp @@ -7,8 +7,14 @@ #include "tatum/TimingReporter.hpp" #include "VprTimingGraphResolver.h" #include "timing_info.h" +#include "placer.h" + +PlacementLogPrinter::PlacementLogPrinter(const Placer& placer) + : placer_(placer) {} + +void PlacementLogPrinter::print_place_status_header() const { + const bool noc_enabled = placer_.noc_opts_.noc; -void print_place_status_header(bool noc_enabled) { VTR_LOG("\n"); if (!noc_enabled) { VTR_LOG( @@ -31,28 +37,31 @@ void print_place_status_header(bool noc_enabled) { } } -void print_place_status(const t_annealing_state& state, - const t_placer_statistics& stats, - float elapsed_sec, - float cpd, - float sTNS, - float sWNS, - size_t tot_moves, - bool noc_enabled, - const NocCostTerms& noc_cost_terms) { +void PlacementLogPrinter::print_place_status(float elapsed_sec) const { + const t_annealing_state& annealing_state = placer_.annealer_->get_annealing_state(); + const auto& [swap_stats, move_type_stats, placer_stats] = placer_.annealer_->get_stats(); + const int tot_moves = placer_.annealer_->get_total_iteration(); + const bool noc_enabled = placer_.noc_opts_.noc; + const NocCostTerms& noc_cost_terms = placer_.costs_.noc_cost_terms; + + const bool is_timing_driven = placer_.placer_opts_.place_algorithm.is_timing_driven(); + const float cpd = is_timing_driven ? placer_.critical_path_.delay() : std::numeric_limits::quiet_NaN(); + const float sTNS = is_timing_driven ? placer_.timing_info_->setup_total_negative_slack() : std::numeric_limits::quiet_NaN(); + const float sWNS = is_timing_driven ? placer_.timing_info_->setup_worst_negative_slack() : std::numeric_limits::quiet_NaN(); + VTR_LOG( "%4zu %6.1f %7.1e " "%7.3f %10.2f %-10.5g " "%7.3f % 10.3g % 8.3f " "%7.3f %7.4f %6.1f %8.2f", - state.num_temps, elapsed_sec, state.t, - stats.av_cost, stats.av_bb_cost, stats.av_timing_cost, + annealing_state.num_temps, elapsed_sec, annealing_state.t, + placer_stats.av_cost, placer_stats.av_bb_cost, placer_stats.av_timing_cost, 1e9 * cpd, 1e9 * sTNS, 1e9 * sWNS, - stats.success_rate, stats.std_dev, state.rlim, state.crit_exponent); + placer_stats.success_rate, placer_stats.std_dev, annealing_state.rlim, annealing_state.crit_exponent); pretty_print_uint(" ", tot_moves, 9, 3); - VTR_LOG(" %6.3f", state.alpha); + VTR_LOG(" %6.3f", annealing_state.alpha); if (noc_enabled) { VTR_LOG( @@ -66,10 +75,10 @@ void print_place_status(const t_annealing_state& state, fflush(stdout); } -void print_resources_utilization(const BlkLocRegistry& blk_loc_registry) { +void PlacementLogPrinter::print_resources_utilization() const { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& device_ctx = g_vpr_ctx.device(); - const auto& block_locs = blk_loc_registry.block_locs(); + const auto& block_locs = placer_.placer_state_.block_locs(); size_t max_block_name = 0; size_t max_tile_name = 0; @@ -103,7 +112,10 @@ void print_resources_utilization(const BlkLocRegistry& blk_loc_registry) { VTR_LOG("\n"); } -void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats) { +void PlacementLogPrinter::print_placement_swaps_stats() const { + const auto& [swap_stats, move_type_stats, placer_stats] = placer_.annealer_->get_stats(); + const t_annealing_state& annealing_state = placer_.annealer_->get_annealing_state(); + size_t total_swap_attempts = swap_stats.num_swap_rejected + swap_stats.num_swap_accepted + swap_stats.num_swap_aborted; VTR_ASSERT(total_swap_attempts > 0); @@ -111,7 +123,7 @@ void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_st float reject_rate = (float)swap_stats.num_swap_rejected / total_swap_attempts; float accept_rate = (float)swap_stats.num_swap_accepted / total_swap_attempts; float abort_rate = (float)swap_stats.num_swap_aborted / total_swap_attempts; - VTR_LOG("Placement number of temperatures: %d\n", state.num_temps); + VTR_LOG("Placement number of temperatures: %d\n", annealing_state.num_temps); VTR_LOG("Placement total # of swap attempts: %*d\n", num_swap_print_digits, total_swap_attempts); VTR_LOG("\tSwaps accepted: %*d (%4.1f %%)\n", num_swap_print_digits, @@ -141,4 +153,5 @@ void generate_post_place_timing_reports(const t_placer_opts& placer_opts, timing_reporter.report_timing_setup( placer_opts.post_place_timing_report_file, *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths); -} \ No newline at end of file +} + diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h index 7e8567a97a0..f6bb64b8f9d 100644 --- a/vpr/src/place/place_log_util.h +++ b/vpr/src/place/place_log_util.h @@ -14,22 +14,20 @@ struct t_analysis_opts; struct NocCostTerms; struct t_swap_stats; class BlkLocRegistry; +class Placer; -void print_place_status_header(bool noc_enabled); +class PlacementLogPrinter { + public: + explicit PlacementLogPrinter(const Placer& placer); -void print_place_status(const t_annealing_state& state, - const t_placer_statistics& stats, - float elapsed_sec, - float cpd, - float sTNS, - float sWNS, - size_t tot_moves, - bool noc_enabled, - const NocCostTerms& noc_cost_terms); + void print_place_status_header() const; + void print_resources_utilization() const; + void print_placement_swaps_stats() const; + void print_place_status(float elapsed_sec) const; -void print_resources_utilization(const BlkLocRegistry& blk_loc_registry); - -void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats); + private: + const Placer& placer_; +}; void generate_post_place_timing_reports(const t_placer_opts& placer_opts, const t_analysis_opts& analysis_opts, diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index f17f56c8b47..76aa214ded3 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -14,7 +14,6 @@ #include "place_timing_update.h" #include "annealer.h" #include "RL_agent_util.h" -#include "place_log_util.h" #include "place_checkpoint.h" Placer::Placer(const Netlist<>& net_list, @@ -31,7 +30,8 @@ Placer::Placer(const Netlist<>& net_list, , placer_state_(placer_opts.place_algorithm.is_timing_driven(), cube_bb) , rng_(placer_opts.seed) , net_cost_handler_(placer_opts, placer_state_, cube_bb) - , place_delay_model_(std::move(place_delay_model)){ + , place_delay_model_(std::move(place_delay_model)) + , log_printer_(*this) { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& device_ctx = g_vpr_ctx.device(); const auto& atom_ctx = g_vpr_ctx.atom(); @@ -322,7 +322,7 @@ void Placer::place() { if (!skip_anneal) { //Table header - print_place_status_header(noc_opts_.noc); + log_printer_.print_place_status_header(); // Outer loop of the simulated annealing begins do { @@ -346,9 +346,7 @@ void Placer::place() { // do a complete inner loop iteration annealer_->placement_inner_loop(); - print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), - critical_path_.delay(), sTNS, sWNS, annealer_->get_total_iteration(), - noc_opts_.noc, costs_.noc_cost_terms); + log_printer_.print_place_status(temperature_timer.elapsed_sec()); // sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g", // costs_.cost, costs_.bb_cost, costs_.timing_cost, annealing_state.t); @@ -384,9 +382,7 @@ void Placer::place() { sWNS = timing_info_->setup_worst_negative_slack(); } - print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), - critical_path_.delay(), sTNS, sWNS, annealer_->get_total_iteration(), - noc_opts_.noc, costs_.noc_cost_terms); + log_printer_.print_place_status(temperature_timer.elapsed_sec()); } auto post_quench_timing_stats = timing_ctx.stats; @@ -431,11 +427,11 @@ void Placer::place() { check_place_(); + print_post_placement_stats_(); - // Print out swap statistics - print_resources_utilization(placer_state_.blk_loc_registry()); - - print_placement_swaps_stats(annealing_state, swap_stats); + // Print out swap statistics and resource utilization + log_printer_.print_resources_utilization(); + log_printer_.print_placement_swaps_stats(); move_type_stats.print_placement_move_types_stats(); diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index c24c045b3ca..96010151149 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -12,6 +12,7 @@ #include "placer_state.h" #include "noc_place_utils.h" #include "net_cost_handler.h" +#include "place_log_util.h" class PlacementAnnealer; namespace vtr{ @@ -64,6 +65,8 @@ class Placer { std::unique_ptr annealer_; + PlacementLogPrinter log_printer_; + private: void alloc_and_init_timing_objects_(const Netlist<>& net_list, const t_analysis_opts& analysis_opts); From 1e10c27565fbab84b6c0e05c3a991fe967bce638 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 13:22:21 -0500 Subject: [PATCH 08/32] remove unused sTNS and sWNS in Placer --- vpr/src/place/placer.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 76aa214ded3..9b6a6f62c61 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -314,9 +314,6 @@ void Placer::place() { } #endif - float sTNS = NAN; - float sWNS = NAN; - const t_annealing_state& annealing_state = annealer_->get_annealing_state(); const auto& [swap_stats, move_type_stats, placer_stats] = annealer_->get_stats(); @@ -332,8 +329,6 @@ void Placer::place() { if (placer_opts_.place_algorithm.is_timing_driven()) { critical_path_ = timing_info_->least_slack_critical_path(); - sTNS = timing_info_->setup_total_negative_slack(); - sWNS = timing_info_->setup_worst_negative_slack(); // see if we should save the current placement solution as a checkpoint if (placer_opts_.place_checkpointing && annealer_->get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) { @@ -378,8 +373,6 @@ void Placer::place() { if (placer_opts_.place_quench_algorithm.is_timing_driven()) { critical_path_ = timing_info_->least_slack_critical_path(); - sTNS = timing_info_->setup_total_negative_slack(); - sWNS = timing_info_->setup_worst_negative_slack(); } log_printer_.print_place_status(temperature_timer.elapsed_sec()); From 34deb5c05024a44a4884552a876400b429e95aed Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 15:10:32 -0500 Subject: [PATCH 09/32] make member variables of Placer private --- vpr/src/place/place.cpp | 6 +++--- vpr/src/place/place_log_util.cpp | 29 ++++++++++++++++------------- vpr/src/place/placer.cpp | 29 +++++++++++++++++++++++++++++ vpr/src/place/placer.h | 21 ++++++++++++++++++++- 4 files changed, 68 insertions(+), 17 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index fa9f007dbb9..ba86e91020b 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -97,13 +97,13 @@ void try_place(const Netlist<>& net_list, Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb); #ifndef NO_GRAPHICS - if (placer.noc_cost_handler_.has_value()) { - get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(placer.noc_cost_handler_->get_link_bandwidth_usages()); + if (placer.noc_cost_handler().has_value()) { + get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(placer.noc_cost_handler()->get_link_bandwidth_usages()); } #endif const int width_fac = placer_opts.place_chan_width; - init_draw_coords((float)width_fac, placer.placer_state_.blk_loc_registry()); + init_draw_coords((float)width_fac, placer.placer_state().blk_loc_registry()); placer.place(); diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp index 4f82ef6e442..4a5e90a2c42 100644 --- a/vpr/src/place/place_log_util.cpp +++ b/vpr/src/place/place_log_util.cpp @@ -13,7 +13,7 @@ PlacementLogPrinter::PlacementLogPrinter(const Placer& placer) : placer_(placer) {} void PlacementLogPrinter::print_place_status_header() const { - const bool noc_enabled = placer_.noc_opts_.noc; + const bool noc_enabled = placer_.noc_opts().noc; VTR_LOG("\n"); if (!noc_enabled) { @@ -38,16 +38,18 @@ void PlacementLogPrinter::print_place_status_header() const { } void PlacementLogPrinter::print_place_status(float elapsed_sec) const { - const t_annealing_state& annealing_state = placer_.annealer_->get_annealing_state(); - const auto& [swap_stats, move_type_stats, placer_stats] = placer_.annealer_->get_stats(); - const int tot_moves = placer_.annealer_->get_total_iteration(); - const bool noc_enabled = placer_.noc_opts_.noc; - const NocCostTerms& noc_cost_terms = placer_.costs_.noc_cost_terms; + const PlacementAnnealer& annealer = placer_.annealer(); + const t_annealing_state& annealing_state = annealer.get_annealing_state(); + const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); + const int tot_moves = annealer.get_total_iteration(); - const bool is_timing_driven = placer_.placer_opts_.place_algorithm.is_timing_driven(); - const float cpd = is_timing_driven ? placer_.critical_path_.delay() : std::numeric_limits::quiet_NaN(); - const float sTNS = is_timing_driven ? placer_.timing_info_->setup_total_negative_slack() : std::numeric_limits::quiet_NaN(); - const float sWNS = is_timing_driven ? placer_.timing_info_->setup_worst_negative_slack() : std::numeric_limits::quiet_NaN(); + const bool noc_enabled = placer_.noc_opts().noc; + const NocCostTerms& noc_cost_terms = placer_.costs().noc_cost_terms; + + const bool is_timing_driven = placer_.placer_opts().place_algorithm.is_timing_driven(); + const float cpd = is_timing_driven ? placer_.critical_path().delay() : std::numeric_limits::quiet_NaN(); + const float sTNS = is_timing_driven ? placer_.timing_info()->setup_total_negative_slack() : std::numeric_limits::quiet_NaN(); + const float sWNS = is_timing_driven ? placer_.timing_info()->setup_worst_negative_slack() : std::numeric_limits::quiet_NaN(); VTR_LOG( "%4zu %6.1f %7.1e " @@ -78,7 +80,7 @@ void PlacementLogPrinter::print_place_status(float elapsed_sec) const { void PlacementLogPrinter::print_resources_utilization() const { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& device_ctx = g_vpr_ctx.device(); - const auto& block_locs = placer_.placer_state_.block_locs(); + const auto& block_locs = placer_.placer_state().block_locs(); size_t max_block_name = 0; size_t max_tile_name = 0; @@ -113,8 +115,9 @@ void PlacementLogPrinter::print_resources_utilization() const { } void PlacementLogPrinter::print_placement_swaps_stats() const { - const auto& [swap_stats, move_type_stats, placer_stats] = placer_.annealer_->get_stats(); - const t_annealing_state& annealing_state = placer_.annealer_->get_annealing_state(); + const PlacementAnnealer& annealer = placer_.annealer(); + const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); + const t_annealing_state& annealing_state = annealer.get_annealing_state(); size_t total_swap_attempts = swap_stats.num_swap_rejected + swap_stats.num_swap_accepted + swap_stats.num_swap_aborted; VTR_ASSERT(total_swap_attempts > 0); diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 9b6a6f62c61..ec78ce47fa8 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -512,3 +512,32 @@ void Placer::copy_locs_to_global_state() { get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry); #endif } + +const PlacementAnnealer& Placer::annealer() const { + return *annealer_; +} + +const t_placer_opts& Placer::placer_opts() const { + return placer_opts_; +} + +const t_noc_opts& Placer::noc_opts() const { + return noc_opts_; +} + +const t_placer_costs& Placer::costs() const { + return costs_; +} + +const tatum::TimingPathInfo& Placer::critical_path() const { + return critical_path_; +} +std::shared_ptr Placer::timing_info() const { + return timing_info_; +} +const PlacerState& Placer::placer_state() const { + return placer_state_; +} +const std::optional& Placer::noc_cost_handler() const { + return noc_cost_handler_; +} diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index 96010151149..ec0aaaeb44c 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -36,8 +36,27 @@ class Placer { */ void copy_locs_to_global_state(); + /* + * Getters + */ + const PlacementAnnealer& annealer() const; + + const t_placer_opts& placer_opts() const; + + const t_noc_opts& noc_opts() const; + + const t_placer_costs& costs() const; + + const tatum::TimingPathInfo& critical_path() const; + + std::shared_ptr timing_info() const; + + const PlacerState& placer_state() const; + + const std::optional& noc_cost_handler() const; + //TODO: make this private - public: + private: const t_placer_opts& placer_opts_; const t_analysis_opts& analysis_opts_; const t_noc_opts& noc_opts_; From ce0fb18897385bf988e79b7f51a5388d5de13138 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 15:30:45 -0500 Subject: [PATCH 10/32] add print_initial_placement_stats() to PlacementLogPrinter --- vpr/src/draw/draw.cpp | 2 +- vpr/src/draw/draw.h | 2 +- vpr/src/place/place_log_util.cpp | 55 ++++++++++++++++++++++++++++++++ vpr/src/place/place_log_util.h | 1 + vpr/src/place/placer.cpp | 48 +--------------------------- vpr/src/place/placer.h | 2 -- 6 files changed, 59 insertions(+), 51 deletions(-) diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp index 546bc8b55f3..46bdd750ca9 100644 --- a/vpr/src/draw/draw.cpp +++ b/vpr/src/draw/draw.cpp @@ -367,7 +367,7 @@ static void initial_setup_NO_PICTURE_to_ROUTING_with_crit_path( } #endif //NO_GRAPHICS -void update_screen(ScreenUpdatePriority priority, const char* msg, enum pic_type pic_on_screen_val, std::shared_ptr setup_timing_info) { +void update_screen(ScreenUpdatePriority priority, const char* msg, enum pic_type pic_on_screen_val, std::shared_ptr setup_timing_info) { #ifndef NO_GRAPHICS /* Updates the screen if the user has requested graphics. The priority * diff --git a/vpr/src/draw/draw.h b/vpr/src/draw/draw.h index 2bbd17d077f..355b2891931 100644 --- a/vpr/src/draw/draw.h +++ b/vpr/src/draw/draw.h @@ -42,7 +42,7 @@ extern ezgl::application application; #endif /* NO_GRAPHICS */ -void update_screen(ScreenUpdatePriority priority, const char* msg, enum pic_type pic_on_screen_val, std::shared_ptr timing_info); +void update_screen(ScreenUpdatePriority priority, const char* msg, enum pic_type pic_on_screen_val, std::shared_ptr timing_info); //FIXME: Currently broken if no rr-graph is loaded /** diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp index 4a5e90a2c42..5e375aa1f35 100644 --- a/vpr/src/place/place_log_util.cpp +++ b/vpr/src/place/place_log_util.cpp @@ -8,6 +8,8 @@ #include "VprTimingGraphResolver.h" #include "timing_info.h" #include "placer.h" +#include "draw.h" +#include "read_place.h" PlacementLogPrinter::PlacementLogPrinter(const Placer& placer) : placer_(placer) {} @@ -136,6 +138,59 @@ void PlacementLogPrinter::print_placement_swaps_stats() const { VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits, swap_stats.num_swap_aborted, 100 * abort_rate); } +void PlacementLogPrinter::print_initial_placement_stats() const { + const t_placer_costs& costs = placer_.costs(); + const t_noc_opts& noc_opts = placer_.noc_opts(); + const t_placer_opts& placer_opts = placer_.placer_opts(); + const tatum::TimingPathInfo& critical_path = placer_.critical_path(); + const std::optional& noc_cost_handler = placer_.noc_cost_handler(); + std::shared_ptr timing_info = placer_.timing_info(); + const PlacerState& placer_state = placer_.placer_state(); + + VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", + costs.cost, costs.bb_cost, costs.timing_cost); + + if (noc_opts.noc) { + VTR_ASSERT(noc_cost_handler.has_value()); + noc_cost_handler->print_noc_costs("Initial NoC Placement Costs", costs, noc_opts); + } + + if (placer_opts.place_algorithm.is_timing_driven()) { + VTR_LOG("Initial placement estimated Critical Path Delay (CPD): %g ns\n", + 1e9 * critical_path.delay()); + VTR_LOG("Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n", + 1e9 * timing_info->setup_total_negative_slack()); + VTR_LOG("Initial placement estimated setup Worst Negative Slack (sWNS): %g ns\n", + 1e9 * timing_info->setup_worst_negative_slack()); + VTR_LOG("\n"); + VTR_LOG("Initial placement estimated setup slack histogram:\n"); + print_histogram(create_setup_slack_histogram(*timing_info->setup_analyzer())); + } + + const BlkLocRegistry& blk_loc_registry = placer_state.blk_loc_registry(); + size_t num_macro_members = 0; + for (const t_pl_macro& macro : blk_loc_registry.place_macros().macros()) { + num_macro_members += macro.members.size(); + } + VTR_LOG("Placement contains %zu placement macros involving %zu blocks (average macro size %f)\n", + blk_loc_registry.place_macros().macros().size(), num_macro_members, + float(num_macro_members) / blk_loc_registry.place_macros().macros().size()); + VTR_LOG("\n"); + + char msg[vtr::bufsize]; + sprintf(msg, + "Initial Placement. Cost: %g BB Cost: %g TD Cost %g \t Channel Factor: %d", + costs.cost, costs.bb_cost, costs.timing_cost, placer_opts.place_chan_width); + + // Draw the initial placement + update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info); + + if (placer_opts.placement_saves_per_temperature >= 1) { + std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0, 0); + VTR_LOG("Saving initial placement to file: %s\n", filename.c_str()); + print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); + } +} void generate_post_place_timing_reports(const t_placer_opts& placer_opts, const t_analysis_opts& analysis_opts, diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h index f6bb64b8f9d..7edc7b5b190 100644 --- a/vpr/src/place/place_log_util.h +++ b/vpr/src/place/place_log_util.h @@ -24,6 +24,7 @@ class PlacementLogPrinter { void print_resources_utilization() const; void print_placement_swaps_stats() const; void print_place_status(float elapsed_sec) const; + void print_initial_placement_stats() const; private: const Placer& placer_; diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index ec78ce47fa8..f8f5e92f7af 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -126,7 +126,7 @@ Placer::Placer(const Netlist<>& net_list, // Sanity check that initial placement is legal check_place_(); - print_initial_placement_stats_(); + log_printer_.print_initial_placement_stats(); annealer_ = std::make_unique(placer_opts_, placer_state_, costs_, net_cost_handler_, noc_cost_handler_, noc_opts_, rng_, std::move(move_generator), std::move(move_generator2), place_delay_model_.get(), @@ -255,52 +255,6 @@ int Placer::check_placement_costs_() { return error; } -void Placer::print_initial_placement_stats_() { - VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", - costs_.cost, costs_.bb_cost, costs_.timing_cost); - - if (noc_opts_.noc) { - VTR_ASSERT(noc_cost_handler_.has_value()); - noc_cost_handler_->print_noc_costs("Initial NoC Placement Costs", costs_, noc_opts_); - } - - if (placer_opts_.place_algorithm.is_timing_driven()) { - VTR_LOG("Initial placement estimated Critical Path Delay (CPD): %g ns\n", - 1e9 * critical_path_.delay()); - VTR_LOG("Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n", - 1e9 * timing_info_->setup_total_negative_slack()); - VTR_LOG("Initial placement estimated setup Worst Negative Slack (sWNS): %g ns\n", - 1e9 * timing_info_->setup_worst_negative_slack()); - VTR_LOG("\n"); - VTR_LOG("Initial placement estimated setup slack histogram:\n"); - print_histogram(create_setup_slack_histogram(*timing_info_->setup_analyzer())); - } - - const BlkLocRegistry& blk_loc_registry = placer_state_.blk_loc_registry(); - size_t num_macro_members = 0; - for (const t_pl_macro& macro : blk_loc_registry.place_macros().macros()) { - num_macro_members += macro.members.size(); - } - VTR_LOG("Placement contains %zu placement macros involving %zu blocks (average macro size %f)\n", - blk_loc_registry.place_macros().macros().size(), num_macro_members, - float(num_macro_members) / blk_loc_registry.place_macros().macros().size()); - VTR_LOG("\n"); - - char msg[vtr::bufsize]; - sprintf(msg, - "Initial Placement. Cost: %g BB Cost: %g TD Cost %g \t Channel Factor: %d", - costs_.cost, costs_.bb_cost, costs_.timing_cost, placer_opts_.place_chan_width); - - // Draw the initial placement - update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info_); - - if (placer_opts_.placement_saves_per_temperature >= 1) { - std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0, 0); - VTR_LOG("Saving initial placement to file: %s\n", filename.c_str()); - print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); - } -} - void Placer::place() { const auto& timing_ctx = g_vpr_ctx.timing(); const auto& cluster_ctx = g_vpr_ctx.clustering(); diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index ec0aaaeb44c..1fb706ace98 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -101,8 +101,6 @@ class Placer { int check_placement_costs_(); - void print_initial_placement_stats_(); - void print_post_placement_stats_(); }; From f2fab3846917735fa490283e6f1be3b11c5dd9e2 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 15:39:41 -0500 Subject: [PATCH 11/32] add msg_ member variable to PlacementLogPrinter --- vpr/src/place/place_log_util.cpp | 15 +++++++++++---- vpr/src/place/place_log_util.h | 2 ++ vpr/src/place/placer.cpp | 5 ----- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp index 5e375aa1f35..c64e425a17a 100644 --- a/vpr/src/place/place_log_util.cpp +++ b/vpr/src/place/place_log_util.cpp @@ -12,7 +12,8 @@ #include "read_place.h" PlacementLogPrinter::PlacementLogPrinter(const Placer& placer) - : placer_(placer) {} + : placer_(placer) + , msg_(vtr::bufsize) {} void PlacementLogPrinter::print_place_status_header() const { const bool noc_enabled = placer_.noc_opts().noc; @@ -44,6 +45,8 @@ void PlacementLogPrinter::print_place_status(float elapsed_sec) const { const t_annealing_state& annealing_state = annealer.get_annealing_state(); const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); const int tot_moves = annealer.get_total_iteration(); + const t_placer_costs& costs = placer_.costs(); + std::shared_ptr timing_info = placer_.timing_info(); const bool noc_enabled = placer_.noc_opts().noc; const NocCostTerms& noc_cost_terms = placer_.costs().noc_cost_terms; @@ -77,6 +80,11 @@ void PlacementLogPrinter::print_place_status(float elapsed_sec) const { VTR_LOG("\n"); fflush(stdout); + + sprintf(msg_.data(), "Cost: %g BB Cost %g TD Cost %g Temperature: %g", + costs.cost, costs.bb_cost, costs.timing_cost, annealing_state.t); + + update_screen(ScreenUpdatePriority::MINOR, msg_.data(), PLACEMENT, timing_info); } void PlacementLogPrinter::print_resources_utilization() const { @@ -177,13 +185,12 @@ void PlacementLogPrinter::print_initial_placement_stats() const { float(num_macro_members) / blk_loc_registry.place_macros().macros().size()); VTR_LOG("\n"); - char msg[vtr::bufsize]; - sprintf(msg, + sprintf(msg_.data(), "Initial Placement. Cost: %g BB Cost: %g TD Cost %g \t Channel Factor: %d", costs.cost, costs.bb_cost, costs.timing_cost, placer_opts.place_chan_width); // Draw the initial placement - update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info); + update_screen(ScreenUpdatePriority::MAJOR, msg_.data(), PLACEMENT, timing_info); if (placer_opts.placement_saves_per_temperature >= 1) { std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0, 0); diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h index 7edc7b5b190..e33d5290953 100644 --- a/vpr/src/place/place_log_util.h +++ b/vpr/src/place/place_log_util.h @@ -3,6 +3,7 @@ #define VTR_PLACE_LOG_UTIL_H #include +#include #include "timing_info_fwd.h" #include "PlacementDelayCalculator.h" @@ -28,6 +29,7 @@ class PlacementLogPrinter { private: const Placer& placer_; + mutable std::vector msg_; }; void generate_post_place_timing_reports(const t_placer_opts& placer_opts, diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index f8f5e92f7af..4d1d1c1c4e9 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -297,11 +297,6 @@ void Placer::place() { log_printer_.print_place_status(temperature_timer.elapsed_sec()); -// sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g", -// costs_.cost, costs_.bb_cost, costs_.timing_cost, annealing_state.t); -// -// update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info_); - //#ifdef VERBOSE // if (getEchoEnabled()) { // print_clb_placement("first_iteration_clb_placement.echo"); From efe43e96cd186dba5c4c377c1c0e3fe0dd239575 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 16:25:30 -0500 Subject: [PATCH 12/32] add print_post_placement_stats() to PlacementLogPrinter --- vpr/src/place/place_log_util.cpp | 56 ++++++++++++++++++++++ vpr/src/place/place_log_util.h | 1 + vpr/src/place/placer.cpp | 80 ++++++-------------------------- vpr/src/place/placer.h | 6 +-- vpr/src/timing/timing_util.cpp | 4 +- vpr/src/timing/timing_util.h | 2 +- 6 files changed, 77 insertions(+), 72 deletions(-) diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp index c64e425a17a..c9c26cece20 100644 --- a/vpr/src/place/place_log_util.cpp +++ b/vpr/src/place/place_log_util.cpp @@ -10,6 +10,7 @@ #include "placer.h" #include "draw.h" #include "read_place.h" +#include "tatum/echo_writer.hpp" PlacementLogPrinter::PlacementLogPrinter(const Placer& placer) : placer_(placer) @@ -199,6 +200,61 @@ void PlacementLogPrinter::print_initial_placement_stats() const { } } +void PlacementLogPrinter::print_post_placement_stats() const { + const auto& timing_ctx = g_vpr_ctx.timing(); + const PlacementAnnealer& annealer = placer_.annealer(); + const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); + + VTR_LOG("\n"); + VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called); + // blocks_affected.move_abortion_logger.report_aborted_moves(); + + if (placer_.placer_opts_.place_algorithm.is_timing_driven()) { + //Final timing estimate + VTR_ASSERT(placer_.timing_info_); + + if (isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) { + tatum::write_echo(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH), + *timing_ctx.graph, *timing_ctx.constraints, + *placer_.placement_delay_calc_, placer_.timing_info_->analyzer()); + + tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(placer_.analysis_opts_.echo_dot_timing_graph_node); + write_setup_timing_graph_dot(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"), + *placer_.timing_info_, debug_tnode); + } + + generate_post_place_timing_reports(placer_.placer_opts_, placer_.analysis_opts_, *placer_.timing_info_, + *placer_.placement_delay_calc_, /*is_flat=*/false, placer_.placer_state_.blk_loc_registry()); + + // Print critical path delay metrics + VTR_LOG("\n"); + print_setup_timing_summary(*timing_ctx.constraints, + *placer_.timing_info_->setup_analyzer(), "Placement estimated ", ""); + } + + char msg[vtr::bufsize]; + sprintf(msg, + "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d", + placer_.costs_.cost, placer_.costs_.bb_cost, placer_.costs_.timing_cost, placer_.placer_opts_.place_chan_width); + VTR_LOG("Placement cost: %g, bb_cost: %g, td_cost: %g, \n", placer_.costs_.cost, + placer_.costs_.bb_cost, placer_.costs_.timing_cost); + update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, placer_.timing_info_); + + // print the noc costs info + if (placer_.noc_opts_.noc) { + VTR_ASSERT(placer_.noc_cost_handler_.has_value()); + placer_.noc_cost_handler_->print_noc_costs("\nNoC Placement Costs", placer_.costs_, placer_.noc_opts_); + + // TODO: move this to an appropriate file +#ifdef ENABLE_NOC_SAT_ROUTING + if (costs.noc_cost_terms.congestion > 0.0) { + VTR_LOG("NoC routing configuration is congested. Invoking the SAT NoC router.\n"); + invoke_sat_router(costs, noc_opts, placer_opts.seed); + } +#endif //ENABLE_NOC_SAT_ROUTING + } +} + void generate_post_place_timing_reports(const t_placer_opts& placer_opts, const t_analysis_opts& analysis_opts, const SetupTimingInfo& timing_info, diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h index e33d5290953..41511ae1dd4 100644 --- a/vpr/src/place/place_log_util.h +++ b/vpr/src/place/place_log_util.h @@ -26,6 +26,7 @@ class PlacementLogPrinter { void print_placement_swaps_stats() const; void print_place_status(float elapsed_sec) const; void print_initial_placement_stats() const; + void print_post_placement_stats() const; private: const Placer& placer_; diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 4d1d1c1c4e9..ec2cc7a9551 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -9,12 +9,12 @@ #include "analytic_placer.h" #include "initial_placement.h" #include "concrete_timing_info.h" -#include "tatum/echo_writer.hpp" #include "verify_placement.h" #include "place_timing_update.h" #include "annealer.h" #include "RL_agent_util.h" #include "place_checkpoint.h" +#include "tatum/echo_writer.hpp" Placer::Placer(const Netlist<>& net_list, const t_placer_opts& placer_opts, @@ -179,17 +179,14 @@ void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list, // Write out the initial timing echo file if (isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)) { - tatum::write_echo( - getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH), - *timing_ctx.graph, *timing_ctx.constraints, - *placement_delay_calc_, timing_info_->analyzer()); + tatum::write_echo(getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH), + *timing_ctx.graph, *timing_ctx.constraints, + *placement_delay_calc_, timing_info_->analyzer()); tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node); - write_setup_timing_graph_dot( - getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH) - + std::string(".dot"), - *timing_info_, debug_tnode); + write_setup_timing_graph_dot(getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"), + *timing_info_, debug_tnode); } costs_.timing_cost_norm = 1 / costs_.timing_cost; @@ -337,8 +334,11 @@ void Placer::place() { perform_full_timing_update(crit_params, place_delay_model_.get(), placer_criticalities_.get(), placer_setup_slacks_.get(), pin_timing_invalidator_.get(), timing_info_.get(), &costs_, placer_state_); + + critical_path_ = timing_info_->least_slack_critical_path(); + VTR_LOG("post-quench CPD = %g (ns) \n", - 1e9 * timing_info_->least_slack_critical_path().delay()); + 1e9 * critical_path_.delay()); } // See if our latest checkpoint is better than the current placement solution @@ -369,7 +369,7 @@ void Placer::place() { check_place_(); - print_post_placement_stats_(); + log_printer_.print_post_placement_stats(); // Print out swap statistics and resource utilization log_printer_.print_resources_utilization(); @@ -391,61 +391,6 @@ void Placer::place() { p_runtime_ctx.f_update_td_costs_total_elapsed_sec); } -void Placer::print_post_placement_stats_() { - const auto& timing_ctx = g_vpr_ctx.timing(); - const auto& [swap_stats, move_type_stats, placer_stats] = annealer_->get_stats(); - - VTR_LOG("\n"); - VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called); -// blocks_affected.move_abortion_logger.report_aborted_moves(); - - if (placer_opts_.place_algorithm.is_timing_driven()) { - //Final timing estimate - VTR_ASSERT(timing_info_); - - critical_path_ = timing_info_->least_slack_critical_path(); - - if (isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) { - tatum::write_echo(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH), - *timing_ctx.graph, *timing_ctx.constraints, - *placement_delay_calc_, timing_info_->analyzer()); - - tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts_.echo_dot_timing_graph_node); - write_setup_timing_graph_dot(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"), - *timing_info_, debug_tnode); - } - - generate_post_place_timing_reports(placer_opts_, analysis_opts_, *timing_info_, - *placement_delay_calc_, /*is_flat=*/false, placer_state_.blk_loc_registry()); - - // Print critical path delay metrics - VTR_LOG("\n"); - print_setup_timing_summary(*timing_ctx.constraints, - *timing_info_->setup_analyzer(), "Placement estimated ", ""); - } - - char msg[vtr::bufsize]; - sprintf(msg, - "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d", - costs_.cost, costs_.bb_cost, costs_.timing_cost, placer_opts_.place_chan_width); - VTR_LOG("Placement cost: %g, bb_cost: %g, td_cost: %g, \n", costs_.cost, - costs_.bb_cost, costs_.timing_cost); - update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info_); - - // print the noc costs info - if (noc_opts_.noc) { - VTR_ASSERT(noc_cost_handler_.has_value()); - noc_cost_handler_->print_noc_costs("\nNoC Placement Costs", costs_, noc_opts_); - -#ifdef ENABLE_NOC_SAT_ROUTING - if (costs.noc_cost_terms.congestion > 0.0) { - VTR_LOG("NoC routing configuration is congested. Invoking the SAT NoC router.\n"); - invoke_sat_router(costs, noc_opts, placer_opts.seed); - } -#endif //ENABLE_NOC_SAT_ROUTING - } -} - void Placer::copy_locs_to_global_state() { auto& place_ctx = g_vpr_ctx.mutable_placement(); @@ -481,12 +426,15 @@ const t_placer_costs& Placer::costs() const { const tatum::TimingPathInfo& Placer::critical_path() const { return critical_path_; } + std::shared_ptr Placer::timing_info() const { return timing_info_; } + const PlacerState& Placer::placer_state() const { return placer_state_; } + const std::optional& Placer::noc_cost_handler() const { return noc_cost_handler_; } diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index 1fb706ace98..ea6643bb7f5 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -84,7 +84,9 @@ class Placer { std::unique_ptr annealer_; - PlacementLogPrinter log_printer_; + const PlacementLogPrinter log_printer_; + + friend void PlacementLogPrinter::print_post_placement_stats() const; private: void alloc_and_init_timing_objects_(const Netlist<>& net_list, @@ -100,8 +102,6 @@ class Placer { void check_place_(); int check_placement_costs_(); - - void print_post_placement_stats_(); }; #endif //VTR_PLACER_H diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp index a210c0dbdcd..a0de3038fde 100644 --- a/vpr/src/timing/timing_util.cpp +++ b/vpr/src/timing/timing_util.cpp @@ -47,7 +47,7 @@ tatum::TimingPathInfo find_least_slack_critical_path_delay(const tatum::TimingCo auto cpds = tatum::find_critical_paths(*timing_ctx.graph, constraints, setup_analyzer); - //Record the maximum critical path accross all domain pairs + //Record the maximum critical path across all domain pairs for (const auto& path_info : cpds) { if (path_info.slack() < crit_path_info.slack() || std::isnan(crit_path_info.slack())) { crit_path_info = path_info; @@ -855,7 +855,7 @@ tatum::NodeId pin_name_to_tnode(std::string pin_name) { return tnode; } -void write_setup_timing_graph_dot(std::string filename, SetupTimingInfo& timing_info, tatum::NodeId debug_node) { +void write_setup_timing_graph_dot(std::string filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node) { auto& timing_graph = *timing_info.timing_graph(); auto dot_writer = tatum::make_graphviz_dot_writer(timing_graph, *timing_info.delay_calculator()); diff --git a/vpr/src/timing/timing_util.h b/vpr/src/timing/timing_util.h index e4d45c84213..0a04a132f44 100644 --- a/vpr/src/timing/timing_util.h +++ b/vpr/src/timing/timing_util.h @@ -119,7 +119,7 @@ void print_tatum_cpds(std::vector cpds); tatum::NodeId id_or_pin_name_to_tnode(std::string name_or_id); tatum::NodeId pin_name_to_tnode(std::string name); -void write_setup_timing_graph_dot(std::string filename, SetupTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID()); +void write_setup_timing_graph_dot(std::string filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID()); void write_hold_timing_graph_dot(std::string filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID()); struct TimingStats { From 8b7780ea4d82fef6603646bb090fd0a05bccf161 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 16:32:46 -0500 Subject: [PATCH 13/32] call get_move_abortion_logger() in print_post_placement_stats() --- vpr/src/place/annealer.cpp | 6 +++++- vpr/src/place/annealer.h | 7 +++++++ vpr/src/place/place_log_util.cpp | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 56f419477e2..1beb0e336c7 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -778,6 +778,10 @@ std::tuple return {swap_stats_, move_type_stats_, placer_stats_}; } +const MoveAbortionLogger& PlacementAnnealer::get_move_abortion_logger() const { + return blocks_affected_.move_abortion_logger; +} + void PlacementAnnealer::LOG_MOVE_STATS_HEADER() { if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { if (move_stats_file_) { @@ -857,4 +861,4 @@ e_move_result PlacementAnnealer::assess_swap_(double delta_c, double t) { } VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(hill climbing)\n"); return e_move_result::REJECTED; -} \ No newline at end of file +} diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index 039ecfb652f..d8e9939cb27 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -210,6 +210,13 @@ class PlacementAnnealer { /// @brief Returns constant references to different statistics objects std::tuple get_stats() const; + /** + * @brief Returns MoveAbortionLogger to report how many moves + * were aborted for each reason. + * @return A constant reference to a MoveAbortionLogger object. + */ + const MoveAbortionLogger& get_move_abortion_logger() const; + private: /** * @brief Determines whether a move should be accepted or not. diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp index c9c26cece20..40598a67a0f 100644 --- a/vpr/src/place/place_log_util.cpp +++ b/vpr/src/place/place_log_util.cpp @@ -207,7 +207,7 @@ void PlacementLogPrinter::print_post_placement_stats() const { VTR_LOG("\n"); VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called); - // blocks_affected.move_abortion_logger.report_aborted_moves(); + annealer.get_move_abortion_logger().report_aborted_moves(); if (placer_.placer_opts_.place_algorithm.is_timing_driven()) { //Final timing estimate From a2da0eeac6b4978b5adee8b8cad61293fbbb5c5f Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 16:33:59 -0500 Subject: [PATCH 14/32] use msg member varible instead of msg local variable --- vpr/src/place/place_log_util.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp index 40598a67a0f..0f2b88ccfcf 100644 --- a/vpr/src/place/place_log_util.cpp +++ b/vpr/src/place/place_log_util.cpp @@ -232,13 +232,12 @@ void PlacementLogPrinter::print_post_placement_stats() const { *placer_.timing_info_->setup_analyzer(), "Placement estimated ", ""); } - char msg[vtr::bufsize]; - sprintf(msg, + sprintf(msg_.data(), "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d", placer_.costs_.cost, placer_.costs_.bb_cost, placer_.costs_.timing_cost, placer_.placer_opts_.place_chan_width); VTR_LOG("Placement cost: %g, bb_cost: %g, td_cost: %g, \n", placer_.costs_.cost, placer_.costs_.bb_cost, placer_.costs_.timing_cost); - update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, placer_.timing_info_); + update_screen(ScreenUpdatePriority::MAJOR, msg_.data(), PLACEMENT, placer_.timing_info_); // print the noc costs info if (placer_.noc_opts_.noc) { From a632425b98f8030cee736e94f8fea5303ead0b7c Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 16:38:07 -0500 Subject: [PATCH 15/32] add quiet mode to PlacementLogPrinter --- vpr/src/place/place_log_util.cpp | 29 +++++++++++++++++++++++++++-- vpr/src/place/place_log_util.h | 3 ++- vpr/src/place/placer.cpp | 2 +- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp index 0f2b88ccfcf..37e16fd4cd6 100644 --- a/vpr/src/place/place_log_util.cpp +++ b/vpr/src/place/place_log_util.cpp @@ -12,11 +12,16 @@ #include "read_place.h" #include "tatum/echo_writer.hpp" -PlacementLogPrinter::PlacementLogPrinter(const Placer& placer) +PlacementLogPrinter::PlacementLogPrinter(const Placer& placer, bool quiet) : placer_(placer) - , msg_(vtr::bufsize) {} + , quiet_(quiet) + , msg_(quiet ? 0 : vtr::bufsize) {} void PlacementLogPrinter::print_place_status_header() const { + if (quiet_) { + return; + } + const bool noc_enabled = placer_.noc_opts().noc; VTR_LOG("\n"); @@ -42,6 +47,10 @@ void PlacementLogPrinter::print_place_status_header() const { } void PlacementLogPrinter::print_place_status(float elapsed_sec) const { + if (quiet_) { + return; + } + const PlacementAnnealer& annealer = placer_.annealer(); const t_annealing_state& annealing_state = annealer.get_annealing_state(); const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); @@ -89,6 +98,10 @@ void PlacementLogPrinter::print_place_status(float elapsed_sec) const { } void PlacementLogPrinter::print_resources_utilization() const { + if (quiet_) { + return; + } + const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& device_ctx = g_vpr_ctx.device(); const auto& block_locs = placer_.placer_state().block_locs(); @@ -126,6 +139,10 @@ void PlacementLogPrinter::print_resources_utilization() const { } void PlacementLogPrinter::print_placement_swaps_stats() const { + if (quiet_) { + return; + } + const PlacementAnnealer& annealer = placer_.annealer(); const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); const t_annealing_state& annealing_state = annealer.get_annealing_state(); @@ -148,6 +165,10 @@ void PlacementLogPrinter::print_placement_swaps_stats() const { swap_stats.num_swap_aborted, 100 * abort_rate); } void PlacementLogPrinter::print_initial_placement_stats() const { + if (quiet_) { + return; + } + const t_placer_costs& costs = placer_.costs(); const t_noc_opts& noc_opts = placer_.noc_opts(); const t_placer_opts& placer_opts = placer_.placer_opts(); @@ -201,6 +222,10 @@ void PlacementLogPrinter::print_initial_placement_stats() const { } void PlacementLogPrinter::print_post_placement_stats() const { + if (quiet_) { + return; + } + const auto& timing_ctx = g_vpr_ctx.timing(); const PlacementAnnealer& annealer = placer_.annealer(); const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h index 41511ae1dd4..d01d3f4b6dc 100644 --- a/vpr/src/place/place_log_util.h +++ b/vpr/src/place/place_log_util.h @@ -19,7 +19,7 @@ class Placer; class PlacementLogPrinter { public: - explicit PlacementLogPrinter(const Placer& placer); + explicit PlacementLogPrinter(const Placer& placer, bool quiet); void print_place_status_header() const; void print_resources_utilization() const; @@ -30,6 +30,7 @@ class PlacementLogPrinter { private: const Placer& placer_; + const bool quiet_; mutable std::vector msg_; }; diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index ec2cc7a9551..b0a1bc45652 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -31,7 +31,7 @@ Placer::Placer(const Netlist<>& net_list, , rng_(placer_opts.seed) , net_cost_handler_(placer_opts, placer_state_, cube_bb) , place_delay_model_(std::move(place_delay_model)) - , log_printer_(*this) { + , log_printer_(*this, /*quiet*/false) { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& device_ctx = g_vpr_ctx.device(); const auto& atom_ctx = g_vpr_ctx.atom(); From 291ec6fb59a024c1b4e8a17b6914bc1b60cc74b2 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 18 Nov 2024 16:46:41 -0500 Subject: [PATCH 16/32] record timing_stats in Placer class --- vpr/src/place/place.cpp | 2 -- vpr/src/place/place_log_util.cpp | 22 ++++++++++++++++++++++ vpr/src/place/placer.cpp | 32 +++++++------------------------- vpr/src/place/placer.h | 4 ++++ 4 files changed, 33 insertions(+), 27 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index ba86e91020b..fb58339355b 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -61,8 +61,6 @@ void try_place(const Netlist<>& net_list, */ VTR_ASSERT(!is_flat); const auto& device_ctx = g_vpr_ctx.device(); -// const auto& timing_ctx = g_vpr_ctx.timing(); -// auto pre_place_timing_stats = timing_ctx.stats; /* Placement delay model is independent of the placement and can be shared across * multiple placers. So, it is created and initialized once. */ diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp index 37e16fd4cd6..29ba2b917a7 100644 --- a/vpr/src/place/place_log_util.cpp +++ b/vpr/src/place/place_log_util.cpp @@ -1,3 +1,4 @@ + #include "place_log_util.h" #include "vtr_log.h" @@ -277,6 +278,27 @@ void PlacementLogPrinter::print_post_placement_stats() const { } #endif //ENABLE_NOC_SAT_ROUTING } + + // Print out swap statistics and resource utilization + print_resources_utilization(); + print_placement_swaps_stats(); + + move_type_stats.print_placement_move_types_stats(); + + if (placer_.noc_opts_.noc) { + write_noc_placement_file(placer_.noc_opts_.noc_placement_file_name, + placer_.placer_state_.block_locs()); + } + + print_timing_stats("Placement Quench", placer_.post_quench_timing_stats_, placer_.pre_quench_timing_stats_); + print_timing_stats("Placement Total ", timing_ctx.stats, placer_.pre_place_timing_stats_); + + const auto& p_runtime_ctx = placer_.placer_state_.runtime(); + VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n", + p_runtime_ctx.f_update_td_costs_connections_elapsed_sec, + p_runtime_ctx.f_update_td_costs_nets_elapsed_sec, + p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec, + p_runtime_ctx.f_update_td_costs_total_elapsed_sec); } void generate_post_place_timing_reports(const t_placer_opts& placer_opts, diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index b0a1bc45652..d2386e86128 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -36,6 +36,9 @@ Placer::Placer(const Netlist<>& net_list, const auto& device_ctx = g_vpr_ctx.device(); const auto& atom_ctx = g_vpr_ctx.atom(); + const auto& timing_ctx = g_vpr_ctx.timing(); + pre_place_timing_stats_ = timing_ctx.stats; + init_placement_context(placer_state_.mutable_blk_loc_registry(), directs); // create a NoC cost handler if NoC optimization is enabled @@ -255,7 +258,7 @@ int Placer::check_placement_costs_() { void Placer::place() { const auto& timing_ctx = g_vpr_ctx.timing(); const auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& p_runtime_ctx = placer_state_.runtime(); + bool skip_anneal = false; #ifdef ENABLE_ANALYTIC_PLACE @@ -265,9 +268,6 @@ void Placer::place() { } #endif - const t_annealing_state& annealing_state = annealer_->get_annealing_state(); - const auto& [swap_stats, move_type_stats, placer_stats] = annealer_->get_stats(); - if (!skip_anneal) { //Table header log_printer_.print_place_status_header(); @@ -307,7 +307,7 @@ void Placer::place() { // Start Quench annealer_->start_quench(); - auto pre_quench_timing_stats = timing_ctx.stats; + pre_quench_timing_stats_ = timing_ctx.stats; { // Quench vtr::ScopedFinishTimer temperature_timer("Placement Quench"); @@ -323,9 +323,10 @@ void Placer::place() { log_printer_.print_place_status(temperature_timer.elapsed_sec()); } - auto post_quench_timing_stats = timing_ctx.stats; + post_quench_timing_stats_ = timing_ctx.stats; // Final timing analysis + const t_annealing_state& annealing_state = annealer_->get_annealing_state(); PlaceCritParams crit_params; crit_params.crit_exponent = annealing_state.crit_exponent; crit_params.crit_limit = placer_opts_.place_crit_limit; @@ -370,25 +371,6 @@ void Placer::place() { check_place_(); log_printer_.print_post_placement_stats(); - - // Print out swap statistics and resource utilization - log_printer_.print_resources_utilization(); - log_printer_.print_placement_swaps_stats(); - - move_type_stats.print_placement_move_types_stats(); - - if (noc_opts_.noc) { - write_noc_placement_file(noc_opts_.noc_placement_file_name, placer_state_.block_locs()); - } - - print_timing_stats("Placement Quench", post_quench_timing_stats, pre_quench_timing_stats); -// print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats); - - VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n", - p_runtime_ctx.f_update_td_costs_connections_elapsed_sec, - p_runtime_ctx.f_update_td_costs_nets_elapsed_sec, - p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec, - p_runtime_ctx.f_update_td_costs_total_elapsed_sec); } void Placer::copy_locs_to_global_state() { diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index ea6643bb7f5..29d4b4bdd2d 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -86,6 +86,10 @@ class Placer { const PlacementLogPrinter log_printer_; + t_timing_analysis_profile_info pre_place_timing_stats_; + t_timing_analysis_profile_info pre_quench_timing_stats_; + t_timing_analysis_profile_info post_quench_timing_stats_; + friend void PlacementLogPrinter::print_post_placement_stats() const; private: From d923d7518d3fd863a38e7a8c3f20db9e09261f4a Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 20 Nov 2024 11:59:09 -0500 Subject: [PATCH 17/32] add is_flat to Placer --- vpr/src/place/place.cpp | 2 +- vpr/src/place/placer.cpp | 12 +++++++----- vpr/src/place/placer.h | 10 ++++++---- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index e687f3d41b3..f0d0ab034c6 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -93,7 +93,7 @@ void try_place(const Netlist<>& net_list, place_ctx.lock_loc_vars(); place_ctx.compressed_block_grids = create_compressed_block_grids(); - Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb); + Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb, is_flat, /*quiet=*/false); #ifndef NO_GRAPHICS if (placer.noc_cost_handler().has_value()) { diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index d2386e86128..e2a1af629de 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -22,7 +22,9 @@ Placer::Placer(const Netlist<>& net_list, const t_noc_opts& noc_opts, const std::vector& directs, std::shared_ptr place_delay_model, - bool cube_bb) + bool cube_bb, + bool is_flat, + bool quiet) : placer_opts_(placer_opts) , analysis_opts_(analysis_opts) , noc_opts_(noc_opts) @@ -31,7 +33,8 @@ Placer::Placer(const Netlist<>& net_list, , rng_(placer_opts.seed) , net_cost_handler_(placer_opts, placer_state_, cube_bb) , place_delay_model_(std::move(place_delay_model)) - , log_printer_(*this, /*quiet*/false) { + , log_printer_(*this, quiet) + , is_flat_(is_flat) { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& device_ctx = g_vpr_ctx.device(); const auto& atom_ctx = g_vpr_ctx.atom(); @@ -61,7 +64,6 @@ Placer::Placer(const Netlist<>& net_list, normalize_noc_cost_weighting_factor(const_cast(noc_opts)); } - BlkLocRegistry& blk_loc_registry = placer_state_.mutable_blk_loc_registry(); initial_placement(placer_opts, placer_opts.constraints_file.c_str(), noc_opts, blk_loc_registry, noc_cost_handler_, rng_); @@ -151,7 +153,7 @@ void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list, placement_delay_calc_ = std::make_shared(atom_ctx.nlist, atom_ctx.lookup, p_timing_ctx.connection_delay, - /*is_flat=*/false); + is_flat_); placement_delay_calc_->set_tsu_margin_relative(placer_opts_.tsu_rel_margin); placement_delay_calc_->set_tsu_margin_absolute(placer_opts_.tsu_abs_margin); @@ -167,7 +169,7 @@ void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list, atom_ctx.nlist, atom_ctx.lookup, *timing_info_->timing_graph(), - /*is_flat=*/false); + is_flat_); // First time compute timing and costs, compute from scratch PlaceCritParams crit_params; diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index 29d4b4bdd2d..d17f61b0650 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -27,7 +27,9 @@ class Placer { const t_noc_opts& noc_opts, const std::vector& directs, std::shared_ptr place_delay_model, - bool cube_bb); + bool cube_bb, + bool is_flat, + bool quiet); void place(); @@ -55,7 +57,6 @@ class Placer { const std::optional& noc_cost_handler() const; - //TODO: make this private private: const t_placer_opts& placer_opts_; const t_analysis_opts& analysis_opts_; @@ -66,6 +67,8 @@ class Placer { NetCostHandler net_cost_handler_; std::optional noc_cost_handler_; std::shared_ptr place_delay_model_; + const PlacementLogPrinter log_printer_; + const bool is_flat_; t_placement_checkpoint placement_checkpoint_; @@ -76,7 +79,6 @@ class Placer { std::unique_ptr pin_timing_invalidator_; tatum::TimingPathInfo critical_path_; - std::unique_ptr timer_; IntraLbPbPinLookup pb_gpin_lookup_; @@ -84,7 +86,7 @@ class Placer { std::unique_ptr annealer_; - const PlacementLogPrinter log_printer_; + t_timing_analysis_profile_info pre_place_timing_stats_; t_timing_analysis_profile_info pre_quench_timing_stats_; From c61625464ebe6b68a6aa348f818ff5a1bb8a16f8 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 20 Nov 2024 13:05:27 -0500 Subject: [PATCH 18/32] fix failure in vtr_reg_strong/strong_graphics_commands --- vpr/src/base/read_options.cpp | 2 +- vpr/src/place/place.cpp | 9 --------- vpr/src/place/placer.cpp | 11 +++++++++++ vpr/src/place/placer.h | 2 -- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index f789f848808..78124dd85c3 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -1377,7 +1377,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio " * set_nets \n" " Sets the net drawing state\n" " * set_cpd \n" - " Sets the criticla path delay drawing state\n" + " Sets the critical path delay drawing state\n" " * set_routing_util \n" " Sets the routing utilization drawing state\n" " * set_clip_routing_util \n" diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index f0d0ab034c6..496a2a1dfde 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -95,15 +95,6 @@ void try_place(const Netlist<>& net_list, Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb, is_flat, /*quiet=*/false); -#ifndef NO_GRAPHICS - if (placer.noc_cost_handler().has_value()) { - get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(placer.noc_cost_handler()->get_link_bandwidth_usages()); - } -#endif - - const int width_fac = placer_opts.place_chan_width; - init_draw_coords((float)width_fac, placer.placer_state().blk_loc_registry()); - placer.place(); free_placement_structs(); diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index e2a1af629de..2de33c88791 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -94,6 +94,17 @@ Placer::Placer(const Netlist<>& net_list, blk_loc_registry.place_sync_external_block_connections(block_id); } + if (!quiet) { +#ifndef NO_GRAPHICS + if (noc_cost_handler_.has_value()) { + get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(noc_cost_handler_->get_link_bandwidth_usages()); + } +#endif + + const int width_fac = placer_opts.place_chan_width; + init_draw_coords((float)width_fac, placer_state_.blk_loc_registry()); + } + // Allocate here because it goes into timing critical code where each memory allocation is expensive pb_gpin_lookup_ = IntraLbPbPinLookup(device_ctx.logical_block_types); // Enables fast look-up of atom pins connect to CLB pins diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index d17f61b0650..f65ed38b56e 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -86,8 +86,6 @@ class Placer { std::unique_ptr annealer_; - - t_timing_analysis_profile_info pre_place_timing_stats_; t_timing_analysis_profile_info pre_quench_timing_stats_; t_timing_analysis_profile_info post_quench_timing_stats_; From 1dbb81ba0cec65d9e0fb3ed23a39720e4ae45f65 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 20 Nov 2024 13:32:35 -0500 Subject: [PATCH 19/32] add some comments to Placer --- vpr/src/place/placer.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index f65ed38b56e..e9be736d4c7 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -58,18 +58,30 @@ class Placer { const std::optional& noc_cost_handler() const; private: + /// Holds placement algorithm parameters const t_placer_opts& placer_opts_; + /// Holds timing analysis parameters const t_analysis_opts& analysis_opts_; + /// Holds NoC-related parameters const t_noc_opts& noc_opts_; + /// Placement cost terms with their normalization factors and total cost t_placer_costs costs_; + /// Holds timing, runtime, and block location information PlacerState placer_state_; + /// Random number generator used to select random blocks and locations vtr::RngContainer rng_; + /// Computes and updates net bounding box cost NetCostHandler net_cost_handler_; + /// Compute and updates NoC-related cost terms if NoC optimization is enabled std::optional noc_cost_handler_; + /// A delay model shared between multiple instances of this class. std::shared_ptr place_delay_model_; + /// Prints logs during placement const PlacementLogPrinter log_printer_; + /// Indicates if flat routing resource graph and delay model is used. It should be false. const bool is_flat_; + /// Stores a placement state as a retrievable checkpoint in case the placement quality deteriorates later. t_placement_checkpoint placement_checkpoint_; std::shared_ptr timing_info_; @@ -105,6 +117,12 @@ class Placer { */ void check_place_(); + /** + * Computes bounding box and timing cost to ensure it is + * within a small error margin what we thing the cost is. + * @return Number cost elements, i.e. BB and timing, that falls + * outside the acceptable round-off error margin. + */ int check_placement_costs_(); }; From 427f9f268eb52dc3476918a5852cd747a05c2763 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 20 Nov 2024 17:33:55 -0500 Subject: [PATCH 20/32] string_view and reference in timing_util --- vpr/src/place/place.cpp | 1 - vpr/src/timing/timing_util.cpp | 75 ++++++++++++++++++---------------- vpr/src/timing/timing_util.h | 22 ++++++---- 3 files changed, 53 insertions(+), 45 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 496a2a1dfde..c0257d939c0 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -9,7 +9,6 @@ #include "globals.h" #include "place.h" #include "annealer.h" -#include "draw.h" #include "read_xml_arch_file.h" #include "echo_files.h" #include "histogram.h" diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp index a0de3038fde..e51fadf5d42 100644 --- a/vpr/src/timing/timing_util.cpp +++ b/vpr/src/timing/timing_util.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "vtr_log.h" #include "vtr_assert.h" @@ -30,7 +31,7 @@ tatum::TimingPathInfo find_longest_critical_path_delay(const tatum::TimingConstr auto cpds = tatum::find_critical_paths(*timing_ctx.graph, constraints, setup_analyzer); - //Record the maximum critical path accross all domain pairs + //Record the maximum critical path across all domain pairs for (const auto& path_info : cpds) { if (crit_path_info.delay() < path_info.delay() || std::isnan(crit_path_info.delay())) { crit_path_info = path_info; @@ -234,7 +235,7 @@ TimingStats::TimingStats(std::string pref, double cpd, double f_max, double swns fmax = f_max; setup_worst_neg_slack = swns; setup_total_neg_slack = stns; - prefix = pref; + prefix = std::move(pref); } void TimingStats::write(OutputFormat fmt, std::ostream& output) const { @@ -255,23 +256,23 @@ void TimingStats::write(OutputFormat fmt, std::ostream& output) const { } } -void write_setup_timing_summary(std::string timing_summary_filename, const TimingStats& stats) { - if (timing_summary_filename.size() > 0) { +void write_setup_timing_summary(std::string_view timing_summary_filename, const TimingStats& stats) { + if (!timing_summary_filename.empty()) { TimingStats::OutputFormat fmt; - if (vtr::check_file_name_extension(timing_summary_filename.c_str(), ".json")) { + if (vtr::check_file_name_extension(timing_summary_filename.data(), ".json")) { fmt = TimingStats::OutputFormat::JSON; - } else if (vtr::check_file_name_extension(timing_summary_filename.c_str(), ".xml")) { + } else if (vtr::check_file_name_extension(timing_summary_filename.data(), ".xml")) { fmt = TimingStats::OutputFormat::XML; - } else if (vtr::check_file_name_extension(timing_summary_filename.c_str(), ".txt")) { + } else if (vtr::check_file_name_extension(timing_summary_filename.data(), ".txt")) { fmt = TimingStats::OutputFormat::HumanReadable; } else { - VPR_FATAL_ERROR(VPR_ERROR_PACK, "Unknown extension on output %s", timing_summary_filename.c_str()); + VPR_FATAL_ERROR(VPR_ERROR_PACK, "Unknown extension on output %s", timing_summary_filename.data()); } std::fstream fp; - fp.open(timing_summary_filename, std::fstream::out | std::fstream::trunc); + fp.open(timing_summary_filename.data(), std::fstream::out | std::fstream::trunc); stats.write(fmt, fp); fp.close(); } @@ -279,8 +280,8 @@ void write_setup_timing_summary(std::string timing_summary_filename, const Timin void print_setup_timing_summary(const tatum::TimingConstraints& constraints, const tatum::SetupTimingAnalyzer& setup_analyzer, - std::string prefix, - std::string timing_summary_filename) { + std::string_view prefix, + std::string_view timing_summary_filename) { auto& timing_ctx = g_vpr_ctx.timing(); auto crit_paths = tatum::find_critical_paths(*timing_ctx.graph, constraints, setup_analyzer); @@ -292,12 +293,12 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, double setup_worst_neg_slack = sec_to_nanosec(find_setup_worst_negative_slack(setup_analyzer)); double setup_total_neg_slack = sec_to_nanosec(find_setup_total_negative_slack(setup_analyzer)); - const auto stats = TimingStats(prefix, least_slack_cpd_delay, fmax, + const auto stats = TimingStats(prefix.data(), least_slack_cpd_delay, fmax, setup_worst_neg_slack, setup_total_neg_slack); if (!timing_summary_filename.empty()) write_setup_timing_summary(timing_summary_filename, stats); - VTR_LOG("%scritical path delay (least slack): %g ns", prefix.c_str(), least_slack_cpd_delay); + VTR_LOG("%scritical path delay (least slack): %g ns", prefix.data(), least_slack_cpd_delay); if (crit_paths.size() == 1) { //Fmax is only meaningful for a single-clock circuit @@ -305,11 +306,11 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, } VTR_LOG("\n"); - VTR_LOG("%ssetup Worst Negative Slack (sWNS): %g ns\n", prefix.c_str(), setup_worst_neg_slack); - VTR_LOG("%ssetup Total Negative Slack (sTNS): %g ns\n", prefix.c_str(), setup_total_neg_slack); + VTR_LOG("%ssetup Worst Negative Slack (sWNS): %g ns\n", prefix.data(), setup_worst_neg_slack); + VTR_LOG("%ssetup Total Negative Slack (sTNS): %g ns\n", prefix.data(), setup_total_neg_slack); VTR_LOG("\n"); - VTR_LOG("%ssetup slack histogram:\n", prefix.c_str()); + VTR_LOG("%ssetup slack histogram:\n", prefix.data()); print_histogram(create_setup_slack_histogram(setup_analyzer)); if (crit_paths.size() > 1) { @@ -317,7 +318,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, VTR_LOG("\n"); //Periods per constraint - VTR_LOG("%sintra-domain critical path delays (CPDs):\n", prefix.c_str()); + VTR_LOG("%sintra-domain critical path delays (CPDs):\n", prefix.data()); for (const auto& path : crit_paths) { if (path.launch_domain() == path.capture_domain()) { VTR_LOG(" %s to %s CPD: %g ns (%g MHz)\n", @@ -329,7 +330,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, } VTR_LOG("\n"); - VTR_LOG("%sinter-domain critical path delays (CPDs):\n", prefix.c_str()); + VTR_LOG("%sinter-domain critical path delays (CPDs):\n", prefix.data()); for (const auto& path : crit_paths) { if (path.launch_domain() != path.capture_domain()) { VTR_LOG(" %s to %s CPD: %g ns (%g MHz)\n", @@ -342,7 +343,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, VTR_LOG("\n"); //Slack per constraint - VTR_LOG("%sintra-domain worst setup slacks per constraint:\n", prefix.c_str()); + VTR_LOG("%sintra-domain worst setup slacks per constraint:\n", prefix.data()); for (const auto& path : crit_paths) { if (path.launch_domain() == path.capture_domain()) { VTR_LOG(" %s to %s worst setup slack: %g ns\n", @@ -353,7 +354,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, } VTR_LOG("\n"); - VTR_LOG("%sinter-domain worst setup slacks per constraint:\n", prefix.c_str()); + VTR_LOG("%sinter-domain worst setup slacks per constraint:\n", prefix.data()); for (const auto& path : crit_paths) { if (path.launch_domain() != path.capture_domain()) { VTR_LOG(" %s to %s worst setup slack: %g ns\n", @@ -374,7 +375,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, if (path.launch_domain() == path.capture_domain() && !constraints.is_virtual_clock(path.launch_domain())) { if (path.delay() == 0.) { VTR_LOG_WARN("%s%s to %s CPD is %g, skipping in geomean and fanout-weighted CPDs\n", - prefix.c_str(), + prefix.data(), constraints.clock_domain_name(path.launch_domain()).c_str(), constraints.clock_domain_name(path.capture_domain()).c_str(), sec_to_nanosec(path.delay())); @@ -394,11 +395,11 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, //Print multi-clock geomeans double geomean_intra_domain_cpd = std::numeric_limits::quiet_NaN(); - if (intra_domain_cpds.size() > 0) { + if (!intra_domain_cpds.empty()) { geomean_intra_domain_cpd = vtr::geomean(intra_domain_cpds.begin(), intra_domain_cpds.end()); } VTR_LOG("%sgeomean non-virtual intra-domain period: %g ns (%g MHz)\n", - prefix.c_str(), + prefix.data(), sec_to_nanosec(geomean_intra_domain_cpd), sec_to_mhz(geomean_intra_domain_cpd)); @@ -408,13 +409,13 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, } double fanout_weighted_geomean_intra_domain_cpd = std::numeric_limits::quiet_NaN(); - if (fanout_weighted_intra_domain_cpds.size() > 0) { + if (!fanout_weighted_intra_domain_cpds.empty()) { fanout_weighted_geomean_intra_domain_cpd = vtr::geomean(fanout_weighted_intra_domain_cpds.begin(), fanout_weighted_intra_domain_cpds.end()); } VTR_LOG("%sfanout-weighted geomean non-virtual intra-domain period: %g ns (%g MHz)\n", - prefix.c_str(), + prefix.data(), sec_to_nanosec(fanout_weighted_geomean_intra_domain_cpd), sec_to_mhz(fanout_weighted_geomean_intra_domain_cpd)); @@ -605,20 +606,22 @@ std::vector create_hold_slack_histogram(const tatum::HoldTiming return histogram; } -void print_hold_timing_summary(const tatum::TimingConstraints& constraints, const tatum::HoldTimingAnalyzer& hold_analyzer, std::string prefix) { +void print_hold_timing_summary(const tatum::TimingConstraints& constraints, + const tatum::HoldTimingAnalyzer& hold_analyzer, + std::string_view prefix) { auto& timing_ctx = g_vpr_ctx.timing(); auto hold_worst_neg_slack = sec_to_nanosec(find_hold_worst_negative_slack(hold_analyzer)); auto hold_total_neg_slack = sec_to_nanosec(find_hold_total_negative_slack(hold_analyzer)); - VTR_LOG("%shold Worst Negative Slack (hWNS): %g ns\n", prefix.c_str(), hold_worst_neg_slack); - VTR_LOG("%shold Total Negative Slack (hTNS): %g ns\n", prefix.c_str(), hold_total_neg_slack); + VTR_LOG("%shold Worst Negative Slack (hWNS): %g ns\n", prefix.data(), hold_worst_neg_slack); + VTR_LOG("%shold Total Negative Slack (hTNS): %g ns\n", prefix.data(), hold_total_neg_slack); /*For testing*/ //VTR_LOG("Hold Total Negative Slack within clbs: %g ns\n", sec_to_nanosec(find_total_negative_slack_within_clb_blocks(hold_analyzer))); VTR_LOG("\n"); - VTR_LOG("%shold slack histogram:\n", prefix.c_str()); + VTR_LOG("%shold slack histogram:\n", prefix.data()); print_histogram(create_hold_slack_histogram(hold_analyzer)); if (constraints.clock_domains().size() > 1) { @@ -626,7 +629,7 @@ void print_hold_timing_summary(const tatum::TimingConstraints& constraints, cons VTR_LOG("\n"); //Slack per constraint - VTR_LOG("%sintra-domain worst hold slacks per constraint:\n", prefix.c_str()); + VTR_LOG("%sintra-domain worst hold slacks per constraint:\n", prefix.data()); for (const auto& domain : constraints.clock_domains()) { float worst_slack = find_hold_worst_slack(hold_analyzer, domain, domain); @@ -639,7 +642,7 @@ void print_hold_timing_summary(const tatum::TimingConstraints& constraints, cons } VTR_LOG("\n"); - VTR_LOG("%sinter-domain worst hold slacks per constraint:\n", prefix.c_str()); + VTR_LOG("%sinter-domain worst hold slacks per constraint:\n", prefix.data()); for (const auto& launch_domain : constraints.clock_domains()) { for (const auto& capture_domain : constraints.clock_domains()) { if (launch_domain != capture_domain) { @@ -816,13 +819,13 @@ float calc_relaxed_criticality(const std::map& domains_max_re return max_crit; } -void print_tatum_cpds(std::vector cpds) { +void print_tatum_cpds(const std::vector& cpds) { for (auto path : cpds) { VTR_LOG("Tatum %zu -> %zu: least_slack=%g cpd=%g\n", size_t(path.launch_domain()), size_t(path.capture_domain()), float(path.slack()), float(path.delay())); } } -tatum::NodeId id_or_pin_name_to_tnode(std::string pin_name_or_tnode) { +tatum::NodeId id_or_pin_name_to_tnode(const std::string& pin_name_or_tnode) { std::istringstream ss(pin_name_or_tnode); int id; if (ss >> id) { //Successfully converted @@ -837,7 +840,7 @@ tatum::NodeId id_or_pin_name_to_tnode(std::string pin_name_or_tnode) { return pin_name_to_tnode(pin_name_or_tnode); } -tatum::NodeId pin_name_to_tnode(std::string pin_name) { +tatum::NodeId pin_name_to_tnode(const std::string& pin_name) { auto& atom_ctx = g_vpr_ctx.atom(); AtomPinId pin = atom_ctx.nlist.find_pin(pin_name); @@ -855,7 +858,7 @@ tatum::NodeId pin_name_to_tnode(std::string pin_name) { return tnode; } -void write_setup_timing_graph_dot(std::string filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node) { +void write_setup_timing_graph_dot(const std::string& filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node) { auto& timing_graph = *timing_info.timing_graph(); auto dot_writer = tatum::make_graphviz_dot_writer(timing_graph, *timing_info.delay_calculator()); @@ -874,7 +877,7 @@ void write_setup_timing_graph_dot(std::string filename, const SetupTimingInfo& t dot_writer.write_dot_file(filename, *timing_info.setup_analyzer()); } -void write_hold_timing_graph_dot(std::string filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node) { +void write_hold_timing_graph_dot(const std::string& filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node) { auto& timing_graph = *timing_info.timing_graph(); auto dot_writer = tatum::make_graphviz_dot_writer(timing_graph, *timing_info.delay_calculator()); diff --git a/vpr/src/timing/timing_util.h b/vpr/src/timing/timing_util.h index 0a04a132f44..e0d011214ba 100644 --- a/vpr/src/timing/timing_util.h +++ b/vpr/src/timing/timing_util.h @@ -1,6 +1,7 @@ #ifndef VPR_TIMING_UTIL_H #define VPR_TIMING_UTIL_H #include +#include #include "netlist_fwd.h" #include "tatum/timing_analyzers.hpp" @@ -49,7 +50,10 @@ std::vector create_criticality_histogram(const Netlist<>& net_l size_t num_bins = 10); //Print a useful summary of timing information -void print_setup_timing_summary(const tatum::TimingConstraints& constraints, const tatum::SetupTimingAnalyzer& setup_analyzer, std::string prefix, std::string timing_summary_filename); +void print_setup_timing_summary(const tatum::TimingConstraints& constraints, + const tatum::SetupTimingAnalyzer& setup_analyzer, + std::string_view prefix, + std::string_view timing_summary_filename); /* * Hold-time related statistics @@ -67,7 +71,9 @@ float find_hold_worst_slack(const tatum::HoldTimingAnalyzer& hold_analyzer, cons std::vector create_hold_slack_histogram(const tatum::HoldTimingAnalyzer& hold_analyzer, size_t num_bins = 10); //Print a useful summary of timing information -void print_hold_timing_summary(const tatum::TimingConstraints& constraints, const tatum::HoldTimingAnalyzer& hold_analyzer, std::string prefix); +void print_hold_timing_summary(const tatum::TimingConstraints& constraints, + const tatum::HoldTimingAnalyzer& hold_analyzer, + std::string_view prefix); float find_total_negative_slack_within_clb_blocks(const tatum::HoldTimingAnalyzer& hold_analyzer); @@ -114,13 +120,13 @@ float calc_relaxed_criticality(const std::map& domains_max_re /* * Debug */ -void print_tatum_cpds(std::vector cpds); +void print_tatum_cpds(const std::vector& cpds); -tatum::NodeId id_or_pin_name_to_tnode(std::string name_or_id); -tatum::NodeId pin_name_to_tnode(std::string name); +tatum::NodeId id_or_pin_name_to_tnode(const std::string& name_or_id); +tatum::NodeId pin_name_to_tnode(const std::string& name); -void write_setup_timing_graph_dot(std::string filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID()); -void write_hold_timing_graph_dot(std::string filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID()); +void write_setup_timing_graph_dot(const std::string& filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID()); +void write_hold_timing_graph_dot(const std::string& filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID()); struct TimingStats { private: @@ -147,6 +153,6 @@ struct TimingStats { }; //Write a useful summary of timing information to JSON file -void write_setup_timing_summary(std::string timing_summary_filename, const TimingStats& stats); +void write_setup_timing_summary(std::string_view timing_summary_filename, const TimingStats& stats); #endif From e70f43c7ef257bbbbd8a53a5e67dbb706cce769d Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 24 Nov 2024 14:05:08 -0500 Subject: [PATCH 21/32] add PlacementContext& arg to copy_locs_to_global_state && use pragma once in header files --- vpr/src/place/place.cpp | 2 +- vpr/src/place/place_log_util.h | 5 +---- vpr/src/place/placer.cpp | 4 +--- vpr/src/place/placer.h | 10 ++++------ 4 files changed, 7 insertions(+), 14 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index c0257d939c0..d1f43af4e05 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -98,7 +98,7 @@ void try_place(const Netlist<>& net_list, free_placement_structs(); - placer.copy_locs_to_global_state(); + placer.copy_locs_to_global_state(place_ctx); } static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h index d01d3f4b6dc..c83b3a0f36d 100644 --- a/vpr/src/place/place_log_util.h +++ b/vpr/src/place/place_log_util.h @@ -1,6 +1,5 @@ -#ifndef VTR_PLACE_LOG_UTIL_H -#define VTR_PLACE_LOG_UTIL_H +#pragma once #include #include @@ -40,5 +39,3 @@ void generate_post_place_timing_reports(const t_placer_opts& placer_opts, const PlacementDelayCalculator& delay_calc, bool is_flat, const BlkLocRegistry& blk_loc_registry); - -#endif //VTR_PLACE_LOG_UTIL_H diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 2de33c88791..26f7ca2f756 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -386,9 +386,7 @@ void Placer::place() { log_printer_.print_post_placement_stats(); } -void Placer::copy_locs_to_global_state() { - auto& place_ctx = g_vpr_ctx.mutable_placement(); - +void Placer::copy_locs_to_global_state(PlacementContext& place_ctx) { // the placement location variables should be unlocked before being accessed place_ctx.unlock_loc_vars(); diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index e9be736d4c7..a01791d87a8 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -1,7 +1,5 @@ - -#ifndef VTR_PLACER_H -#define VTR_PLACER_H +#pragma once #include #include @@ -34,9 +32,10 @@ class Placer { void place(); /** - * @brief Copies the placement location variables into the global placement context. + * @brief Copies the placement location variables into the given global placement context. + * @param place_ctx The placement context to which location information will be copied. */ - void copy_locs_to_global_state(); + void copy_locs_to_global_state(PlacementContext& place_ctx); /* * Getters @@ -126,4 +125,3 @@ class Placer { int check_placement_costs_(); }; -#endif //VTR_PLACER_H From 39ad04ebefda8df739fcd3202fafddb3ec1ede7b Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 24 Nov 2024 14:25:38 -0500 Subject: [PATCH 22/32] delete free_placement_structs() --- vpr/src/place/place.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index d1f43af4e05..f00a2100a76 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -36,8 +36,6 @@ void print_clb_placement(const char* fname); static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, const RRGraphView& rr_graph); -static void free_placement_structs(); - /*****************************************************************************/ void try_place(const Netlist<>& net_list, const t_placer_opts& placer_opts, @@ -96,8 +94,12 @@ void try_place(const Netlist<>& net_list, placer.place(); - free_placement_structs(); + vtr::release_memory(place_ctx.compressed_block_grids); + /* The placer object has its own copy of block locations and doesn't update + * the global context directly. We need to copy its internal data structures + * to the global placement context before it goes out of scope. + */ placer.copy_locs_to_global_state(place_ctx); } @@ -126,13 +128,6 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, return cube_bb; } -/* Frees the major structures needed by the placer (and not needed - * elsewhere). */ -static void free_placement_structs() { - auto& place_ctx = g_vpr_ctx.mutable_placement(); - vtr::release_memory(place_ctx.compressed_block_grids); -} - #ifdef VERBOSE void print_clb_placement(const char* fname) { /* Prints out the clb placements to a file. */ From e93d89872351a8c7a56d0c2b185372a47629c26a Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 24 Nov 2024 14:43:12 -0500 Subject: [PATCH 23/32] add file comments for placer.h and place_log_util.h --- vpr/src/place/place.h | 6 ++---- vpr/src/place/place_log_util.h | 17 +++++++++++++++++ vpr/src/place/placer.h | 16 ++++++++++++++++ 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/vpr/src/place/place.h b/vpr/src/place/place.h index 210663823a8..e4a0172ba4e 100644 --- a/vpr/src/place/place.h +++ b/vpr/src/place/place.h @@ -1,5 +1,5 @@ -#ifndef VPR_PLACE_H -#define VPR_PLACE_H + +#pragma once #include "vpr_types.h" @@ -13,5 +13,3 @@ void try_place(const Netlist<>& net_list, std::vector& segment_inf, const std::vector& directs, bool is_flat); - -#endif diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/place_log_util.h index c83b3a0f36d..8c437a922fa 100644 --- a/vpr/src/place/place_log_util.h +++ b/vpr/src/place/place_log_util.h @@ -1,3 +1,20 @@ +/** + * @file placement_log_printer.h + * @brief Declares the PlacementLogPrinter class and associated utilities for logging + * and reporting placement-related statistics and timing analysis results. + * + * This file provides tools to monitor and report the progress and results of the placement stage. + * + * ### Key Components: + * - **PlacementLogPrinter**: + * - A utility class for logging placement status, resource utilization, and swap statistics. + * - Prints detailed statistics during the placement process, including initial and post-placement states. + * - Supports a "quiet mode" to suppress output. + * + * ### Integration: + * The tools in this file integrate with the Placer class to provide information about + * the placement process for debugging, optimization, and analysis purposes. + */ #pragma once diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index a01791d87a8..ede938482f7 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -1,3 +1,19 @@ +/** + * @file placer.h + * @brief Declares the Placer class, which encapsulates the functionality, data structures, + * and algorithms required for the placement stage. + * + * The Placer class initializes necessary objects, performs an initial placement, + * and runs simulated annealing optimization. This optimization minimizes + * wirelength (bounding box) and timing costs to achieve an efficient placement solution. + * + * Key features of the Placer class: + * - Encapsulates all placement-related variables, cost functions, and data structures. + * - Supports optional NoC (Network-on-Chip) cost optimizations if enabled. + * - Interfaces with timing analysis, placement delay calculation. + * - Provides a mechanism for checkpointing the placement state. + * - Includes debugging and validation utilities to verify the correctness of placement. + */ #pragma once From 877fd8eb2c02f050eabca707abcf9e1f7e418efa Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 24 Nov 2024 16:31:11 -0500 Subject: [PATCH 24/32] remove accessor methods from Placer --- vpr/src/place/place_log_util.cpp | 49 ++++++++++++++------------------ vpr/src/place/placer.cpp | 32 --------------------- vpr/src/place/placer.h | 22 +------------- 3 files changed, 23 insertions(+), 80 deletions(-) diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp index 29ba2b917a7..aa02ed96b40 100644 --- a/vpr/src/place/place_log_util.cpp +++ b/vpr/src/place/place_log_util.cpp @@ -23,7 +23,7 @@ void PlacementLogPrinter::print_place_status_header() const { return; } - const bool noc_enabled = placer_.noc_opts().noc; + const bool noc_enabled = placer_.noc_opts_.noc; VTR_LOG("\n"); if (!noc_enabled) { @@ -52,20 +52,20 @@ void PlacementLogPrinter::print_place_status(float elapsed_sec) const { return; } - const PlacementAnnealer& annealer = placer_.annealer(); + const PlacementAnnealer& annealer = *placer_.annealer_; const t_annealing_state& annealing_state = annealer.get_annealing_state(); const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); const int tot_moves = annealer.get_total_iteration(); - const t_placer_costs& costs = placer_.costs(); - std::shared_ptr timing_info = placer_.timing_info(); + const t_placer_costs& costs = placer_.costs_; + std::shared_ptr timing_info = placer_.timing_info_; - const bool noc_enabled = placer_.noc_opts().noc; - const NocCostTerms& noc_cost_terms = placer_.costs().noc_cost_terms; + const bool noc_enabled = placer_.noc_opts_.noc; + const NocCostTerms& noc_cost_terms = placer_.costs_.noc_cost_terms; - const bool is_timing_driven = placer_.placer_opts().place_algorithm.is_timing_driven(); - const float cpd = is_timing_driven ? placer_.critical_path().delay() : std::numeric_limits::quiet_NaN(); - const float sTNS = is_timing_driven ? placer_.timing_info()->setup_total_negative_slack() : std::numeric_limits::quiet_NaN(); - const float sWNS = is_timing_driven ? placer_.timing_info()->setup_worst_negative_slack() : std::numeric_limits::quiet_NaN(); + const bool is_timing_driven = placer_.placer_opts_.place_algorithm.is_timing_driven(); + const float cpd = is_timing_driven ? placer_.critical_path_.delay() : std::numeric_limits::quiet_NaN(); + const float sTNS = is_timing_driven ? placer_.timing_info_->setup_total_negative_slack() : std::numeric_limits::quiet_NaN(); + const float sWNS = is_timing_driven ? placer_.timing_info_->setup_worst_negative_slack() : std::numeric_limits::quiet_NaN(); VTR_LOG( "%4zu %6.1f %7.1e " @@ -105,7 +105,7 @@ void PlacementLogPrinter::print_resources_utilization() const { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& device_ctx = g_vpr_ctx.device(); - const auto& block_locs = placer_.placer_state().block_locs(); + const auto& block_locs = placer_.placer_state_.block_locs(); size_t max_block_name = 0; size_t max_tile_name = 0; @@ -144,7 +144,7 @@ void PlacementLogPrinter::print_placement_swaps_stats() const { return; } - const PlacementAnnealer& annealer = placer_.annealer(); + const PlacementAnnealer& annealer = *placer_.annealer_; const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); const t_annealing_state& annealing_state = annealer.get_annealing_state(); @@ -170,25 +170,21 @@ void PlacementLogPrinter::print_initial_placement_stats() const { return; } - const t_placer_costs& costs = placer_.costs(); - const t_noc_opts& noc_opts = placer_.noc_opts(); - const t_placer_opts& placer_opts = placer_.placer_opts(); - const tatum::TimingPathInfo& critical_path = placer_.critical_path(); - const std::optional& noc_cost_handler = placer_.noc_cost_handler(); - std::shared_ptr timing_info = placer_.timing_info(); - const PlacerState& placer_state = placer_.placer_state(); + const t_placer_costs& costs = placer_.costs_; + const t_placer_opts& placer_opts = placer_.placer_opts_; + std::shared_ptr timing_info = placer_.timing_info_; VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost, costs.bb_cost, costs.timing_cost); - if (noc_opts.noc) { - VTR_ASSERT(noc_cost_handler.has_value()); - noc_cost_handler->print_noc_costs("Initial NoC Placement Costs", costs, noc_opts); + if (placer_.noc_opts_.noc) { + VTR_ASSERT(placer_.noc_cost_handler_.has_value()); + placer_.noc_cost_handler_->print_noc_costs("Initial NoC Placement Costs", costs, placer_.noc_opts_); } if (placer_opts.place_algorithm.is_timing_driven()) { VTR_LOG("Initial placement estimated Critical Path Delay (CPD): %g ns\n", - 1e9 * critical_path.delay()); + 1e9 * placer_.critical_path_.delay()); VTR_LOG("Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n", 1e9 * timing_info->setup_total_negative_slack()); VTR_LOG("Initial placement estimated setup Worst Negative Slack (sWNS): %g ns\n", @@ -198,7 +194,7 @@ void PlacementLogPrinter::print_initial_placement_stats() const { print_histogram(create_setup_slack_histogram(*timing_info->setup_analyzer())); } - const BlkLocRegistry& blk_loc_registry = placer_state.blk_loc_registry(); + const BlkLocRegistry& blk_loc_registry = placer_.placer_state_.blk_loc_registry(); size_t num_macro_members = 0; for (const t_pl_macro& macro : blk_loc_registry.place_macros().macros()) { num_macro_members += macro.members.size(); @@ -228,12 +224,11 @@ void PlacementLogPrinter::print_post_placement_stats() const { } const auto& timing_ctx = g_vpr_ctx.timing(); - const PlacementAnnealer& annealer = placer_.annealer(); - const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); + const auto& [swap_stats, move_type_stats, placer_stats] = placer_.annealer_->get_stats(); VTR_LOG("\n"); VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called); - annealer.get_move_abortion_logger().report_aborted_moves(); + placer_.annealer_->get_move_abortion_logger().report_aborted_moves(); if (placer_.placer_opts_.place_algorithm.is_timing_driven()) { //Final timing estimate diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 26f7ca2f756..3a56a4e03e5 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -399,35 +399,3 @@ void Placer::copy_locs_to_global_state(PlacementContext& place_ctx) { get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry); #endif } - -const PlacementAnnealer& Placer::annealer() const { - return *annealer_; -} - -const t_placer_opts& Placer::placer_opts() const { - return placer_opts_; -} - -const t_noc_opts& Placer::noc_opts() const { - return noc_opts_; -} - -const t_placer_costs& Placer::costs() const { - return costs_; -} - -const tatum::TimingPathInfo& Placer::critical_path() const { - return critical_path_; -} - -std::shared_ptr Placer::timing_info() const { - return timing_info_; -} - -const PlacerState& Placer::placer_state() const { - return placer_state_; -} - -const std::optional& Placer::noc_cost_handler() const { - return noc_cost_handler_; -} diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index ede938482f7..a6e6f35cf46 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -53,25 +53,6 @@ class Placer { */ void copy_locs_to_global_state(PlacementContext& place_ctx); - /* - * Getters - */ - const PlacementAnnealer& annealer() const; - - const t_placer_opts& placer_opts() const; - - const t_noc_opts& noc_opts() const; - - const t_placer_costs& costs() const; - - const tatum::TimingPathInfo& critical_path() const; - - std::shared_ptr timing_info() const; - - const PlacerState& placer_state() const; - - const std::optional& noc_cost_handler() const; - private: /// Holds placement algorithm parameters const t_placer_opts& placer_opts_; @@ -117,7 +98,7 @@ class Placer { t_timing_analysis_profile_info pre_quench_timing_stats_; t_timing_analysis_profile_info post_quench_timing_stats_; - friend void PlacementLogPrinter::print_post_placement_stats() const; + friend class PlacementLogPrinter; private: void alloc_and_init_timing_objects_(const Netlist<>& net_list, @@ -140,4 +121,3 @@ class Placer { */ int check_placement_costs_(); }; - From b760d03da5a87de39b0098bc529fd5112f5a78f4 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 24 Nov 2024 16:38:07 -0500 Subject: [PATCH 25/32] quiet the placement timer if the placer object is quiet --- libs/libvtrutil/src/vtr_time.h | 2 +- vpr/src/place/place.cpp | 5 +---- vpr/src/place/placer.cpp | 2 ++ 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/libs/libvtrutil/src/vtr_time.h b/libs/libvtrutil/src/vtr_time.h index 4e389ef5026..3f187e59288 100644 --- a/libs/libvtrutil/src/vtr_time.h +++ b/libs/libvtrutil/src/vtr_time.h @@ -36,7 +36,7 @@ class Timer { constexpr static float BYTE_TO_MIB = 1024 * 1024; }; -///@brief Scoped time class which prints the time elapsed for the specifid action +///@brief Scoped time class which prints the time elapsed for the specified action class ScopedActionTimer : public Timer { public: ScopedActionTimer(std::string action); diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index f00a2100a76..86d1f374212 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -1,3 +1,4 @@ + #include #include "vtr_assert.h" @@ -47,10 +48,6 @@ void try_place(const Netlist<>& net_list, std::vector& segment_inf, const std::vector& directs, bool is_flat) { - /* Does almost all the work of placing a circuit. Width_fac gives the * - * width of the widest channel. Place_cost_exp says what exponent the * - * width should be taken to when calculating costs. This allows a * - * greater bias for anisotropic architectures. */ /* Currently, the functions that require is_flat as their parameter and are called during placement should * receive is_flat as false. For example, if the RR graph of router lookahead is built here, it should be as diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 3a56a4e03e5..12dc5276646 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -51,6 +51,7 @@ Placer::Placer(const Netlist<>& net_list, // Start measuring placement time timer_ = std::make_unique("Placement"); + timer_->quiet(quiet); /* To make sure the importance of NoC-related cost terms compared to * BB and timing cost is determine only through NoC placement weighting factor, @@ -101,6 +102,7 @@ Placer::Placer(const Netlist<>& net_list, } #endif + // width_fac gives the width of the widest channel const int width_fac = placer_opts.place_chan_width; init_draw_coords((float)width_fac, placer_state_.blk_loc_registry()); } From aa53d975fb3be4f18de2c797aedbade434f96621 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 27 Nov 2024 15:47:57 -0500 Subject: [PATCH 26/32] add some comments --- vpr/src/place/place.cpp | 8 ++++++- vpr/src/place/place_log_util.cpp | 1 + vpr/src/place/placer.cpp | 16 ++----------- vpr/src/place/placer.h | 41 +++++++++++++++++++++++++++++++- vpr/src/timing/timing_info.h | 2 +- 5 files changed, 51 insertions(+), 17 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 86d1f374212..b090e46d0a5 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -57,7 +57,8 @@ void try_place(const Netlist<>& net_list, const auto& device_ctx = g_vpr_ctx.device(); /* Placement delay model is independent of the placement and can be shared across - * multiple placers. So, it is created and initialized once. */ + * multiple placers if we are performing parallel annealing. + * So, it is created and initialized once. */ std::shared_ptr place_delay_model; if (placer_opts.place_algorithm.is_timing_driven()) { @@ -84,6 +85,11 @@ void try_place(const Netlist<>& net_list, VTR_LOG("\n"); auto& place_ctx = g_vpr_ctx.mutable_placement(); + + /* Make the global instance of BlkLocRegistry inaccessible through the getter methods of the + * placement context. This is done to make sure that the placement stage only accesses its + * own local instances of BlkLocRegistry. + */ place_ctx.lock_loc_vars(); place_ctx.compressed_block_grids = create_compressed_block_grids(); diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/place_log_util.cpp index aa02ed96b40..d825a3af093 100644 --- a/vpr/src/place/place_log_util.cpp +++ b/vpr/src/place/place_log_util.cpp @@ -165,6 +165,7 @@ void PlacementLogPrinter::print_placement_swaps_stats() const { VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits, swap_stats.num_swap_aborted, 100 * abort_rate); } + void PlacementLogPrinter::print_initial_placement_stats() const { if (quiet_) { return; diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 12dc5276646..3ad4d37ddd6 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -79,7 +79,7 @@ Placer::Placer(const Netlist<>& net_list, #ifdef ENABLE_ANALYTIC_PLACE /* - * Analytic Placer: + * Cluster-level Analytic Placer: * Passes in the initial_placement via vpr_context, and passes its placement back via locations marked on * both the clb_netlist and the gird. * Most of anneal is disabled later by setting initial temperature to 0 and only further optimizes in quench @@ -284,7 +284,7 @@ void Placer::place() { #endif if (!skip_anneal) { - //Table header + // Table header log_printer_.print_place_status_header(); // Outer loop of the simulated annealing begins @@ -309,12 +309,6 @@ void Placer::place() { log_printer_.print_place_status(temperature_timer.elapsed_sec()); - //#ifdef VERBOSE - // if (getEchoEnabled()) { - // print_clb_placement("first_iteration_clb_placement.echo"); - // } - //#endif - // Outer loop of the simulated annealing ends } while (annealer_->outer_loop_update_state()); } //skip_anneal ends @@ -372,12 +366,6 @@ void Placer::place() { print_place(nullptr, nullptr, filename.c_str(), placer_state_.mutable_block_locs()); } - //#ifdef VERBOSE - // if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_END_CLB_PLACEMENT)) { - // print_clb_placement(getEchoFileName(E_ECHO_END_CLB_PLACEMENT)); - // } - //#endif - // Update physical pin values for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { placer_state_.mutable_blk_loc_registry().place_sync_external_block_connections(block_id); diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index a6e6f35cf46..412b0c040a5 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -79,12 +79,17 @@ class Placer { /// Stores a placement state as a retrievable checkpoint in case the placement quality deteriorates later. t_placement_checkpoint placement_checkpoint_; - + /// It holds a setup timing analysis engine. Other placement timing object usually have a reference or pointer to timing_info. std::shared_ptr timing_info_; + /// Post-clustering delay calculator. Its API allows extraction of delay for each timing edge. std::shared_ptr placement_delay_calc_; + /// Stores setup slack of the clustered netlist connections. std::unique_ptr placer_setup_slacks_; + /// Stores criticalities of the clustered netlist connections. std::unique_ptr placer_criticalities_; + /// Used to invalidate timing edges corresponding to the pins of moved blocks. std::unique_ptr pin_timing_invalidator_; + /// Stores information about the critical path. This is usually updated after that timing info is updated. tatum::TimingPathInfo critical_path_; std::unique_ptr timer_; @@ -92,15 +97,49 @@ class Placer { IntraLbPbPinLookup pb_gpin_lookup_; ClusteredPinAtomPinsLookup netlist_pin_lookup_; + /// Performs random swaps and implements the simulated annealer optimizer. std::unique_ptr annealer_; + /* These variables store timing analysis profiling information + * at different stages of the placement to be printed at the end + */ t_timing_analysis_profile_info pre_place_timing_stats_; t_timing_analysis_profile_info pre_quench_timing_stats_; t_timing_analysis_profile_info post_quench_timing_stats_; + /* PlacementLogPrinter is made a friend of this class, so it can + * access its private member variables without getter methods. + * PlacementLogPrinter holds a constant reference to an object of type + * Placer to avoid modifying its member variables. + */ friend class PlacementLogPrinter; private: + /** + * @brief Constructs and initializes timing-related objects. + * + * This function performs the following steps to set up timing analysis: + * + * 1. Constructs a `tatum::DelayCalculator` for post-clustering delay calculations. + * This calculator holds a reference to `PlacerTimingContext::connection_delay`, + * which contains net delays based on block locations. + * + * 2. Creates and stores a `SetupTimingInfo` object in `timing_info_`. + * This object utilizes the delay calculator to compute delays on timing edges + * and calculate setup times. + * + * 3. Constructs `PlacerSetupSlacks` and `PlacerCriticalities` objects, + * which translate arrival and required times into slacks and criticalities, + * respectively. + * + * 4. Creates a `NetPinTimingInvalidator` object to mark timing edges + * corresponding to the pins of moved blocks as invalid. + * + * 5. Performs a full timing analysis by marking all pins as invalid. + * + * @param net_list The netlist used for iterating over pins. + * @param analysis_opts Analysis options, including whether to echo the timing graph. + */ void alloc_and_init_timing_objects_(const Netlist<>& net_list, const t_analysis_opts& analysis_opts); diff --git a/vpr/src/timing/timing_info.h b/vpr/src/timing/timing_info.h index 14d3b08f939..836c95e50d7 100644 --- a/vpr/src/timing/timing_info.h +++ b/vpr/src/timing/timing_info.h @@ -62,7 +62,7 @@ class SetupTimingInfo : public virtual TimingInfo { //Return the critical path with the least slack virtual tatum::TimingPathInfo least_slack_critical_path() const = 0; - //Return the critical path the the longest absolute delay + //Return the critical path the longest absolute delay virtual tatum::TimingPathInfo longest_critical_path() const = 0; //Return the set of critical paths between all clock domain pairs From 726f3768482768f45eb26a7b32137d9983becb23 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 27 Nov 2024 17:43:47 -0500 Subject: [PATCH 27/32] store pointers to TimingInfo in PlacerSetupSlacks and PlacerCriticalities and NetPinTimingInvalidator --- vpr/src/place/annealer.cpp | 6 ++--- vpr/src/place/place_timing_update.cpp | 6 ++--- vpr/src/place/placer.cpp | 10 +++++--- vpr/src/place/placer.h | 5 ++-- vpr/src/place/timing_place.cpp | 31 ++++++++++++++---------- vpr/src/place/timing_place.h | 23 ++++++++++++------ vpr/src/route/route.cpp | 2 +- vpr/src/route/route_net.h | 2 +- vpr/src/timing/NetPinTimingInvalidator.h | 30 ++++++++++++----------- vpr/src/timing/timing_info.h | 2 +- 10 files changed, 68 insertions(+), 49 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 1d8836956ab..b18f60b27bd 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -471,7 +471,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, */ // Invalidates timing of modified connections for incremental timing updates. - pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); + pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_); /* Update the connection_timing_cost and connection_delay * values from the temporary values. */ @@ -532,7 +532,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, /* Invalidates timing of modified connections for incremental * timing updates. These invalidations are accumulated for a * big timing update in the outer loop. */ - pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); + pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_); /* Update the connection_timing_cost and connection_delay * values from the temporary values. */ @@ -588,7 +588,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, /* Re-invalidate the affected sink pins since the proposed * move is rejected, and the same blocks are reverted to * their original positions. */ - pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); + pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_); // Revert the timing update update_timing_classes(crit_params, timing_info_, criticalities_, diff --git a/vpr/src/place/place_timing_update.cpp b/vpr/src/place/place_timing_update.cpp index d558f386c4b..c9c53b88f90 100644 --- a/vpr/src/place/place_timing_update.cpp +++ b/vpr/src/place/place_timing_update.cpp @@ -45,7 +45,7 @@ void initialize_timing_info(const PlaceCritParams& crit_params, //by passing in all the clb sink pins for (ClusterNetId net_id : clb_nlist.nets()) { for (ClusterPinId pin_id : clb_nlist.net_sinks(net_id)) { - pin_timing_invalidator->invalidate_connection(pin_id, timing_info); + pin_timing_invalidator->invalidate_connection(pin_id); } } @@ -142,10 +142,10 @@ void update_timing_classes(const PlaceCritParams& crit_params, timing_info->update(); /* Update the placer's criticalities (e.g. sharpen with crit_exponent). */ - criticalities->update_criticalities(timing_info, crit_params, placer_state); + criticalities->update_criticalities(crit_params, placer_state); /* Update the placer's raw setup slacks. */ - setup_slacks->update_setup_slacks(timing_info); + setup_slacks->update_setup_slacks(); /* Clear invalidation state. */ pin_timing_invalidator->reset(); diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 3ad4d37ddd6..ab32b8a6890 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -172,16 +172,20 @@ void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list, timing_info_ = make_setup_timing_info(placement_delay_calc_, placer_opts_.timing_update_type); - placer_setup_slacks_ = std::make_unique(cluster_ctx.clb_nlist, netlist_pin_lookup_); + placer_setup_slacks_ = std::make_unique(cluster_ctx.clb_nlist, + netlist_pin_lookup_, + timing_info_); - placer_criticalities_ = std::make_unique(cluster_ctx.clb_nlist, netlist_pin_lookup_); + placer_criticalities_ = std::make_unique(cluster_ctx.clb_nlist, + netlist_pin_lookup_, + timing_info_); pin_timing_invalidator_ = make_net_pin_timing_invalidator(placer_opts_.timing_update_type, net_list, netlist_pin_lookup_, atom_ctx.nlist, atom_ctx.lookup, - *timing_info_->timing_graph(), + timing_info_, is_flat_); // First time compute timing and costs, compute from scratch diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index 412b0c040a5..66692b8ca1d 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -130,10 +130,11 @@ class Placer { * * 3. Constructs `PlacerSetupSlacks` and `PlacerCriticalities` objects, * which translate arrival and required times into slacks and criticalities, - * respectively. + * respectively. These objects hold pointers to timing_info_. * * 4. Creates a `NetPinTimingInvalidator` object to mark timing edges - * corresponding to the pins of moved blocks as invalid. + * corresponding to the pins of moved blocks as invalid. This object + * holds a pointer to timing_info_. * * 5. Performs a full timing analysis by marking all pins as invalid. * diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp index 021bb6211fb..badd9d1fb61 100644 --- a/vpr/src/place/timing_place.cpp +++ b/vpr/src/place/timing_place.cpp @@ -16,9 +16,12 @@ #include "timing_info.h" ///@brief Allocates space for the timing_place_crit_ data structure. -PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup) +PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info) : clb_nlist_(clb_nlist) , pin_lookup_(netlist_pin_lookup) + , timing_info_(std::move(timing_info)) , timing_place_crit_(make_net_pins_matrix(clb_nlist_, std::numeric_limits::quiet_NaN())) { } @@ -32,8 +35,7 @@ PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, cons * * If the criticality exponent has changed, we also need to update from scratch. */ -void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_info, - const PlaceCritParams& crit_params, +void PlacerCriticalities::update_criticalities(const PlaceCritParams& crit_params, PlacerState& placer_state) { /* If update is not enabled, exit the routine. */ if (!update_enabled) { @@ -44,7 +46,7 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf /* Determine what pins need updating */ if (!recompute_required && crit_params.crit_exponent == last_crit_exponent_) { - incr_update_criticalities(timing_info); + incr_update_criticalities(); } else { recompute_criticalities(); @@ -63,7 +65,7 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); // Routing for placement is not flat (at least for the time being) - float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info, pin_lookup_, ParentPinId(size_t(clb_pin)), /*is_flat=*/false); + float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info_, pin_lookup_, ParentPinId(size_t(clb_pin)), /*is_flat=*/false); float new_crit = pow(clb_pin_crit, crit_params.crit_exponent); /* @@ -114,10 +116,10 @@ void PlacerCriticalities::set_recompute_required() { * atom pin criticalities. */ -void PlacerCriticalities::incr_update_criticalities(const SetupTimingInfo* timing_info) { +void PlacerCriticalities::incr_update_criticalities() { cluster_pins_with_modified_criticality_.clear(); - for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_criticality()) { + for (AtomPinId atom_pin : timing_info_->pins_with_modified_setup_criticality()) { ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin); //Some atom pins correspond to connections which are completely @@ -164,9 +166,12 @@ PlacerCriticalities::pin_range PlacerCriticalities::pins_with_modified_criticali /**************************************/ ///@brief Allocates space for the timing_place_setup_slacks_ data structure. -PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup) +PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info) : clb_nlist_(clb_nlist) , pin_lookup_(netlist_pin_lookup) + , timing_info_(std::move(timing_info)) , timing_place_setup_slacks_(make_net_pins_matrix(clb_nlist_, std::numeric_limits::quiet_NaN())) { } @@ -180,7 +185,7 @@ PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const Cl * In this case, `recompute_required` would be true, and we update all setup slacks * from scratch. */ -void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info) { +void PlacerSetupSlacks::update_setup_slacks() { /* If update is not enabled, exit the routine. */ if (!update_enabled) { /* re-computation is required on the next iteration */ @@ -190,7 +195,7 @@ void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info) /* Determine what pins need updating */ if (!recompute_required) { - incr_update_setup_slacks(timing_info); + incr_update_setup_slacks(); } else { recompute_setup_slacks(); } @@ -200,7 +205,7 @@ void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info) ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); - float clb_pin_setup_slack = calculate_clb_net_pin_setup_slack(*timing_info, pin_lookup_, clb_pin); + float clb_pin_setup_slack = calculate_clb_net_pin_setup_slack(*timing_info_, pin_lookup_, clb_pin); timing_place_setup_slacks_[clb_net][pin_index_in_net] = clb_pin_setup_slack; } @@ -217,10 +222,10 @@ void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info) * Note we use the set of pins reported by the *timing_info* as having modified * setup slacks, rather than those marked as modified by the timing analyzer. */ -void PlacerSetupSlacks::incr_update_setup_slacks(const SetupTimingInfo* timing_info) { +void PlacerSetupSlacks::incr_update_setup_slacks() { cluster_pins_with_modified_setup_slack_.clear(); - for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_slack()) { + for (AtomPinId atom_pin : timing_info_->pins_with_modified_setup_slack()) { ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin); //Some atom pins correspond to connections which are completely diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h index 852c1aa6297..71e144334ad 100644 --- a/vpr/src/place/timing_place.h +++ b/vpr/src/place/timing_place.h @@ -101,7 +101,9 @@ class PlacerCriticalities { typedef vtr::Range net_range; public: //Lifetime - PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup); + PlacerCriticalities(const ClusteredNetlist& clb_nlist, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info); PlacerCriticalities(const PlacerCriticalities&) = delete; PlacerCriticalities& operator=(const PlacerCriticalities&) = delete; @@ -125,8 +127,7 @@ class PlacerCriticalities { * If out of sync, then the criticalities cannot be incrementally updated on * during the next timing analysis iteration. */ - void update_criticalities(const SetupTimingInfo* timing_info, - const PlaceCritParams& crit_params, + void update_criticalities(const PlaceCritParams& crit_params, PlacerState& placer_state); ///@bried Enable the recompute_required flag to enforce from scratch update. @@ -151,6 +152,9 @@ class PlacerCriticalities { ///@brief The lookup table that maps atom pins to clb pins. const ClusteredPinAtomPinsLookup& pin_lookup_; + ///@brief A pointer to the setup timing analyzer + std::shared_ptr timing_info_; + /** * @brief The matrix that stores criticality value for each connection. * @@ -168,7 +172,7 @@ class PlacerCriticalities { vtr::vec_id_set cluster_pins_with_modified_criticality_; ///@brief Incremental update. See timing_place.cpp for more. - void incr_update_criticalities(const SetupTimingInfo* timing_info); + void incr_update_criticalities(); ///@brief Flag that turns on/off the update_criticalities() routine. bool update_enabled = true; @@ -215,7 +219,9 @@ class PlacerSetupSlacks { typedef vtr::Range net_range; public: //Lifetime - PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup); + PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info); PlacerSetupSlacks(const PlacerSetupSlacks& clb_nlist) = delete; PlacerSetupSlacks& operator=(const PlacerSetupSlacks& clb_nlist) = delete; @@ -232,14 +238,14 @@ class PlacerSetupSlacks { public: //Modifiers /** * @brief Updates setup slacks based on the atom netlist setup slacks provided - * by timing_info. + * by timing_info_. * * Should consistently call this method after the most recent timing analysis to * keep the setup slacks stored in this class in sync with the timing analyzer. * If out of sync, then the setup slacks cannot be incrementally updated on * during the next timing analysis iteration. */ - void update_setup_slacks(const SetupTimingInfo* timing_info); + void update_setup_slacks(); ///@bried Enable the recompute_required flag to enforce from scratch update. void set_recompute_required() { recompute_required = true; } @@ -256,6 +262,7 @@ class PlacerSetupSlacks { private: //Data const ClusteredNetlist& clb_nlist_; const ClusteredPinAtomPinsLookup& pin_lookup_; + std::shared_ptr timing_info_; /** * @brief The matrix that stores raw setup slack values for each connection. @@ -268,7 +275,7 @@ class PlacerSetupSlacks { vtr::vec_id_set cluster_pins_with_modified_setup_slack_; ///@brief Incremental update. See timing_place.cpp for more. - void incr_update_setup_slacks(const SetupTimingInfo* timing_info); + void incr_update_setup_slacks(); ///@brief Incremental update. See timing_place.cpp for more. void recompute_setup_slacks(); diff --git a/vpr/src/route/route.cpp b/vpr/src/route/route.cpp index d4dbc2a4d55..08ef1892a49 100644 --- a/vpr/src/route/route.cpp +++ b/vpr/src/route/route.cpp @@ -202,7 +202,7 @@ bool route(const Netlist<>& net_list, netlist_pin_lookup, atom_ctx.nlist, atom_ctx.lookup, - *timing_info->timing_graph(), + timing_info, is_flat); std::unique_ptr netlist_router = make_netlist_router( diff --git a/vpr/src/route/route_net.h b/vpr/src/route/route_net.h index fcfd5607582..f996be8b64c 100644 --- a/vpr/src/route/route_net.h +++ b/vpr/src/route/route_net.h @@ -93,7 +93,7 @@ inline void update_net_delay_from_isink(float* net_delay, //Delay changed, invalidate for incremental timing update VTR_ASSERT_SAFE(timing_info); ParentPinId pin = net_list.net_pin(inet, isink); - pin_timing_invalidator->invalidate_connection(pin, timing_info); + pin_timing_invalidator->invalidate_connection(pin); } net_delay[isink] = new_delay; diff --git a/vpr/src/timing/NetPinTimingInvalidator.h b/vpr/src/timing/NetPinTimingInvalidator.h index 754d118aef2..c76a075cb74 100644 --- a/vpr/src/timing/NetPinTimingInvalidator.h +++ b/vpr/src/timing/NetPinTimingInvalidator.h @@ -20,7 +20,7 @@ class NetPinTimingInvalidator { typedef vtr::Range tedge_range; virtual ~NetPinTimingInvalidator() = default; virtual tedge_range pin_timing_edges(ParentPinId /* pin */) const = 0; - virtual void invalidate_connection(ParentPinId /* pin */, TimingInfo* /* timing_info */) = 0; + virtual void invalidate_connection(ParentPinId /* pin */) = 0; virtual void reset() = 0; /** @@ -32,12 +32,10 @@ class NetPinTimingInvalidator { * Invalidate all the timing graph edges associated with these connections via * the NetPinTimingInvalidator class. */ - void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, TimingInfo* timing_info) { - VTR_ASSERT_SAFE(timing_info); - + void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected) { // Invalidate timing graph edges affected by the move for (ClusterPinId pin : blocks_affected.affected_pins) { - invalidate_connection(pin, timing_info); + invalidate_connection(pin); } } }; @@ -54,15 +52,17 @@ class IncrNetPinTimingInvalidator : public NetPinTimingInvalidator { const ClusteredPinAtomPinsLookup& clb_atom_pin_lookup, const AtomNetlist& atom_nlist, const AtomLookup& atom_lookup, - const tatum::TimingGraph& timing_graph, - bool is_flat) { + std::shared_ptr timing_info, + bool is_flat) + : timing_info_(std::move(timing_info)) { + size_t num_pins = net_list.pins().size(); pin_first_edge_.reserve(num_pins + 1); //Exact timing_edges_.reserve(num_pins + 1); //Lower bound for (ParentPinId pin_id : net_list.pins()) { pin_first_edge_.push_back(timing_edges_.size()); if (is_flat) { - tatum::EdgeId tedge = atom_pin_to_timing_edge(timing_graph, atom_nlist, atom_lookup, convert_to_atom_pin_id(pin_id)); + tatum::EdgeId tedge = atom_pin_to_timing_edge(*timing_info_->timing_graph(), atom_nlist, atom_lookup, convert_to_atom_pin_id(pin_id)); if (!tedge) { continue; @@ -73,7 +73,7 @@ class IncrNetPinTimingInvalidator : public NetPinTimingInvalidator { auto cluster_pin_id = convert_to_cluster_pin_id(pin_id); auto atom_pins = clb_atom_pin_lookup.connected_atom_pins(cluster_pin_id); for (const AtomPinId atom_pin : atom_pins) { - tatum::EdgeId tedge = atom_pin_to_timing_edge(timing_graph, atom_nlist, atom_lookup, atom_pin); + tatum::EdgeId tedge = atom_pin_to_timing_edge(*timing_info_->timing_graph(), atom_nlist, atom_lookup, atom_pin); if (!tedge) { continue; @@ -101,11 +101,11 @@ class IncrNetPinTimingInvalidator : public NetPinTimingInvalidator { /** Invalidates all timing edges associated with the clustered netlist connection * driving the specified pin. * Is concurrently safe. */ - void invalidate_connection(ParentPinId pin, TimingInfo* timing_info) { + void invalidate_connection(ParentPinId pin) { if (invalidated_pins_.count(pin)) return; //Already invalidated for (tatum::EdgeId edge : pin_timing_edges(pin)) { - timing_info->invalidate_delay(edge); + timing_info_->invalidate_delay(edge); } invalidated_pins_.insert(pin); @@ -146,6 +146,7 @@ class IncrNetPinTimingInvalidator : public NetPinTimingInvalidator { } private: + std::shared_ptr timing_info_; std::vector pin_first_edge_; //Indices into timing_edges corresponding std::vector timing_edges_; @@ -167,7 +168,7 @@ class NoopNetPinTimingInvalidator : public NetPinTimingInvalidator { return vtr::make_range((const tatum::EdgeId*)nullptr, (const tatum::EdgeId*)nullptr); } - void invalidate_connection(ParentPinId /* pin */, TimingInfo* /* timing_info */) { + void invalidate_connection(ParentPinId /* pin */) { } void reset() { @@ -181,12 +182,13 @@ inline std::unique_ptr make_net_pin_timing_invalidator( const ClusteredPinAtomPinsLookup& clb_atom_pin_lookup, const AtomNetlist& atom_nlist, const AtomLookup& atom_lookup, - const tatum::TimingGraph& timing_graph, + const std::shared_ptr& timing_info, bool is_flat) { if (update_type == e_timing_update_type::FULL || update_type == e_timing_update_type::AUTO) { return std::make_unique(); } else { VTR_ASSERT(update_type == e_timing_update_type::INCREMENTAL); - return std::make_unique(net_list, clb_atom_pin_lookup, atom_nlist, atom_lookup, timing_graph, is_flat); + return std::make_unique(net_list, clb_atom_pin_lookup, atom_nlist, + atom_lookup, timing_info, is_flat); } } \ No newline at end of file diff --git a/vpr/src/timing/timing_info.h b/vpr/src/timing/timing_info.h index 836c95e50d7..fbd21cbf1bc 100644 --- a/vpr/src/timing/timing_info.h +++ b/vpr/src/timing/timing_info.h @@ -7,7 +7,7 @@ #include "tatum/timing_paths.hpp" #include "timing_util.h" -//Generic inteface which provides functionality to update (but not +//Generic interface which provides functionality to update (but not //access) timing information. // //This is useful for algorithms which know they need to update timing From d28d56d5a5128b51b16974648dd73fe5ef289874 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 27 Nov 2024 17:53:30 -0500 Subject: [PATCH 28/32] alloc_lookups_and_delay_model returns a unique_ptr instead of shared_ptr --- vpr/src/place/place_delay_model.cpp | 2 +- vpr/src/place/place_delay_model.h | 2 +- vpr/src/place/placer.cpp | 2 +- vpr/src/place/placer.h | 6 +++++- vpr/src/place/timing_place_lookup.cpp | 10 +++++----- vpr/src/place/timing_place_lookup.h | 2 +- 6 files changed, 14 insertions(+), 10 deletions(-) diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/place_delay_model.cpp index 36070bf8423..4f626a5817f 100644 --- a/vpr/src/place/place_delay_model.cpp +++ b/vpr/src/place/place_delay_model.cpp @@ -318,7 +318,7 @@ void OverrideDelayModel::write(const std::string& file) const { #endif ///@brief Initialize the placer delay model. -std::shared_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, +std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, t_chan_width_dist chan_width_dist, const t_placer_opts& placer_opts, const t_router_opts& router_opts, diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/place_delay_model.h index d1cd3c2164a..0aa01385e6e 100644 --- a/vpr/src/place/place_delay_model.h +++ b/vpr/src/place/place_delay_model.h @@ -29,7 +29,7 @@ class PlaceDelayModel; class PlacerState; ///@brief Initialize the placer delay model. -std::shared_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, +std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, t_chan_width_dist chan_width_dist, const t_placer_opts& place_opts, const t_router_opts& router_opts, diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index ab32b8a6890..409b1954a7b 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -281,7 +281,7 @@ void Placer::place() { bool skip_anneal = false; #ifdef ENABLE_ANALYTIC_PLACE - // When enabled, skip most of the annealing and go straight to quench + // Cluster-level analytic placer: when enabled, skip most of the annealing and go straight to quench if (placer_opts_.enable_analytic_placer) { skip_anneal = true; } diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index 66692b8ca1d..3e0462a6a9a 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -79,7 +79,11 @@ class Placer { /// Stores a placement state as a retrievable checkpoint in case the placement quality deteriorates later. t_placement_checkpoint placement_checkpoint_; - /// It holds a setup timing analysis engine. Other placement timing object usually have a reference or pointer to timing_info. + /** + * @brief Holds a setup timing analysis engine. + * Other placement timing objects like PlacerSetupSlacks, PlacerCriticalities, and NetPinTimingInvalidator + * have a pointer to timing_info. A shared pointer is used to manage the lifetime of the object. + */ std::shared_ptr timing_info_; /// Post-clustering delay calculator. Its API allows extraction of delay for each timing edge. std::shared_ptr placement_delay_calc_; diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index 873633a9c5e..86dc396e2b8 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -170,7 +170,7 @@ static float find_neighboring_average(vtr::NdMatrix& matrix, /******* Globally Accessible Functions **********/ -std::shared_ptr compute_place_delay_model(const t_placer_opts& placer_opts, +std::unique_ptr compute_place_delay_model(const t_placer_opts& placer_opts, const t_router_opts& router_opts, const Netlist<>& net_list, t_det_routing_arch* det_routing_arch, @@ -196,15 +196,15 @@ std::shared_ptr compute_place_delay_model(const t_placer_opts& int longest_length = get_longest_segment_length(segment_inf); /*now setup and compute the actual arrays */ - std::shared_ptr place_delay_model; + std::unique_ptr place_delay_model; float min_cross_layer_delay = get_min_cross_layer_delay(); if (placer_opts.delay_model_type == PlaceDelayModelType::SIMPLE) { - place_delay_model = std::make_shared(); + place_delay_model = std::make_unique(); } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) { - place_delay_model = std::make_shared(min_cross_layer_delay, is_flat); + place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) { - place_delay_model = std::make_shared(min_cross_layer_delay, is_flat); + place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); } else { VTR_ASSERT_MSG(false, "Invalid placer delay model"); } diff --git a/vpr/src/place/timing_place_lookup.h b/vpr/src/place/timing_place_lookup.h index 14897a7fcc4..fba3f470483 100644 --- a/vpr/src/place/timing_place_lookup.h +++ b/vpr/src/place/timing_place_lookup.h @@ -2,7 +2,7 @@ #define TIMING_PLACE_LOOKUP_H #include "place_delay_model.h" -std::shared_ptr compute_place_delay_model(const t_placer_opts& placer_opts, +std::unique_ptr compute_place_delay_model(const t_placer_opts& placer_opts, const t_router_opts& router_opts, const Netlist<>& net_list, t_det_routing_arch* det_routing_arch, From ddfec83d7ae14ea9a9ccbb62aa33bcfa1e29f3fa Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 27 Nov 2024 18:07:12 -0500 Subject: [PATCH 29/32] rename place_log_util to placement_log_printer --- vpr/src/place/{place_log_util.cpp => placement_log_printer.cpp} | 2 +- vpr/src/place/{place_log_util.h => placement_log_printer.h} | 0 vpr/src/place/placer.h | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename vpr/src/place/{place_log_util.cpp => placement_log_printer.cpp} (99%) rename vpr/src/place/{place_log_util.h => placement_log_printer.h} (100%) diff --git a/vpr/src/place/place_log_util.cpp b/vpr/src/place/placement_log_printer.cpp similarity index 99% rename from vpr/src/place/place_log_util.cpp rename to vpr/src/place/placement_log_printer.cpp index d825a3af093..a4eafb3b30f 100644 --- a/vpr/src/place/place_log_util.cpp +++ b/vpr/src/place/placement_log_printer.cpp @@ -1,5 +1,5 @@ -#include "place_log_util.h" +#include "placement_log_printer.h" #include "vtr_log.h" #include "annealer.h" diff --git a/vpr/src/place/place_log_util.h b/vpr/src/place/placement_log_printer.h similarity index 100% rename from vpr/src/place/place_log_util.h rename to vpr/src/place/placement_log_printer.h diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index 3e0462a6a9a..e5d23ff567c 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -26,7 +26,7 @@ #include "placer_state.h" #include "noc_place_utils.h" #include "net_cost_handler.h" -#include "place_log_util.h" +#include "placement_log_printer.h" class PlacementAnnealer; namespace vtr{ From 1dcd63ba95e33d0c693d7df970f3dce133858fd8 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 27 Nov 2024 18:09:17 -0500 Subject: [PATCH 30/32] start measuring the placement time from the moment the Placer object is constructed --- vpr/src/place/compressed_grid.cpp | 9 +++++++++ vpr/src/place/place.cpp | 5 +++++ vpr/src/place/placer.cpp | 7 +------ vpr/src/place/placer.h | 2 -- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/vpr/src/place/compressed_grid.cpp b/vpr/src/place/compressed_grid.cpp index 0e78e6b99b7..33fa04cbfc1 100644 --- a/vpr/src/place/compressed_grid.cpp +++ b/vpr/src/place/compressed_grid.cpp @@ -1,6 +1,9 @@ + #include "compressed_grid.h" + #include "arch_util.h" #include "globals.h" +#include "vtr_time.h" /** * @brief Creates a compressed grid from the given locations. @@ -16,6 +19,12 @@ static t_compressed_block_grid create_compressed_block_grid(const std::vector create_compressed_block_grids() { + /* Measure how long it takes to allocate and initialize compressed grid. + * The measured execution time is printed when this object goes out of scope + * at the end of this function. + */ + vtr::ScopedStartFinishTimer compressed_grid_timer("Compressed grid construction"); + auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; const int num_layers = grid.get_num_layers(); diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index b090e46d0a5..80a3b6edc24 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -93,6 +93,11 @@ void try_place(const Netlist<>& net_list, place_ctx.lock_loc_vars(); place_ctx.compressed_block_grids = create_compressed_block_grids(); + /* Start measuring placement time. The measured execution time will be printed + * when this object goes out of scope at the end of this function. + */ + vtr::ScopedStartFinishTimer placement_timer("Placement"); + Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb, is_flat, /*quiet=*/false); placer.place(); diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 409b1954a7b..cc3bd20f0c6 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -39,8 +39,7 @@ Placer::Placer(const Netlist<>& net_list, const auto& device_ctx = g_vpr_ctx.device(); const auto& atom_ctx = g_vpr_ctx.atom(); - const auto& timing_ctx = g_vpr_ctx.timing(); - pre_place_timing_stats_ = timing_ctx.stats; + pre_place_timing_stats_ = g_vpr_ctx.timing().stats; init_placement_context(placer_state_.mutable_blk_loc_registry(), directs); @@ -49,10 +48,6 @@ Placer::Placer(const Netlist<>& net_list, noc_cost_handler_.emplace(placer_state_.block_locs()); } - // Start measuring placement time - timer_ = std::make_unique("Placement"); - timer_->quiet(quiet); - /* To make sure the importance of NoC-related cost terms compared to * BB and timing cost is determine only through NoC placement weighting factor, * we normalize NoC-related cost weighting factors so that they add up to 1. diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index e5d23ff567c..dbbe7466e7a 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -96,8 +96,6 @@ class Placer { /// Stores information about the critical path. This is usually updated after that timing info is updated. tatum::TimingPathInfo critical_path_; - std::unique_ptr timer_; - IntraLbPbPinLookup pb_gpin_lookup_; ClusteredPinAtomPinsLookup netlist_pin_lookup_; From be433121c0b708f24724276ae425edc509d02744 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 27 Nov 2024 18:18:28 -0500 Subject: [PATCH 31/32] move the construction of pb_gpin_lookup and netlist_pin_lookup to try_place --- vpr/src/place/place.cpp | 10 +++++++++- vpr/src/place/placer.cpp | 11 ++++------- vpr/src/place/placer.h | 9 ++++++--- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 80a3b6edc24..3506d00b801 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -55,6 +55,8 @@ void try_place(const Netlist<>& net_list, */ VTR_ASSERT(!is_flat); const auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& atom_ctx = g_vpr_ctx.atom(); /* Placement delay model is independent of the placement and can be shared across * multiple placers if we are performing parallel annealing. @@ -98,7 +100,13 @@ void try_place(const Netlist<>& net_list, */ vtr::ScopedStartFinishTimer placement_timer("Placement"); - Placer placer(net_list, placer_opts, analysis_opts, noc_opts, directs, place_delay_model, cube_bb, is_flat, /*quiet=*/false); + // Enables fast look-up pb graph pins from block pin indices + IntraLbPbPinLookup pb_gpin_lookup(device_ctx.logical_block_types); + // Enables fast look-up of atom pins connect to CLB pins + ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, pb_gpin_lookup); + + Placer placer(net_list, placer_opts, analysis_opts, noc_opts, pb_gpin_lookup, netlist_pin_lookup, + directs, place_delay_model, cube_bb, is_flat, /*quiet=*/false); placer.place(); diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index cc3bd20f0c6..37b48f11d0d 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -20,6 +20,8 @@ Placer::Placer(const Netlist<>& net_list, const t_placer_opts& placer_opts, const t_analysis_opts& analysis_opts, const t_noc_opts& noc_opts, + const IntraLbPbPinLookup& pb_gpin_lookup, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, const std::vector& directs, std::shared_ptr place_delay_model, bool cube_bb, @@ -28,6 +30,8 @@ Placer::Placer(const Netlist<>& net_list, : placer_opts_(placer_opts) , analysis_opts_(analysis_opts) , noc_opts_(noc_opts) + , pb_gpin_lookup_(pb_gpin_lookup) + , netlist_pin_lookup_(netlist_pin_lookup) , costs_(placer_opts.place_algorithm, noc_opts.noc) , placer_state_(placer_opts.place_algorithm.is_timing_driven(), cube_bb) , rng_(placer_opts.seed) @@ -36,8 +40,6 @@ Placer::Placer(const Netlist<>& net_list, , log_printer_(*this, quiet) , is_flat_(is_flat) { const auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& device_ctx = g_vpr_ctx.device(); - const auto& atom_ctx = g_vpr_ctx.atom(); pre_place_timing_stats_ = g_vpr_ctx.timing().stats; @@ -102,11 +104,6 @@ Placer::Placer(const Netlist<>& net_list, init_draw_coords((float)width_fac, placer_state_.blk_loc_registry()); } - // Allocate here because it goes into timing critical code where each memory allocation is expensive - pb_gpin_lookup_ = IntraLbPbPinLookup(device_ctx.logical_block_types); - // Enables fast look-up of atom pins connect to CLB pins - netlist_pin_lookup_ = ClusteredPinAtomPinsLookup(cluster_ctx.clb_nlist, atom_ctx.nlist, pb_gpin_lookup_); - // Gets initial cost and loads bounding boxes. costs_.bb_cost = net_cost_handler_.comp_bb_cost(e_cost_methods::NORMAL); costs_.bb_cost_norm = 1 / costs_.bb_cost; diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index dbbe7466e7a..086630e2c09 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -39,6 +39,8 @@ class Placer { const t_placer_opts& placer_opts, const t_analysis_opts& analysis_opts, const t_noc_opts& noc_opts, + const IntraLbPbPinLookup& pb_gpin_lookup, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, const std::vector& directs, std::shared_ptr place_delay_model, bool cube_bb, @@ -60,6 +62,10 @@ class Placer { const t_analysis_opts& analysis_opts_; /// Holds NoC-related parameters const t_noc_opts& noc_opts_; + /// Enables fast look-up pb graph pins from block pin indices + const IntraLbPbPinLookup& pb_gpin_lookup_; + /// Enables fast look-up of atom pins connect to CLB pins + const ClusteredPinAtomPinsLookup& netlist_pin_lookup_; /// Placement cost terms with their normalization factors and total cost t_placer_costs costs_; /// Holds timing, runtime, and block location information @@ -96,9 +102,6 @@ class Placer { /// Stores information about the critical path. This is usually updated after that timing info is updated. tatum::TimingPathInfo critical_path_; - IntraLbPbPinLookup pb_gpin_lookup_; - ClusteredPinAtomPinsLookup netlist_pin_lookup_; - /// Performs random swaps and implements the simulated annealer optimizer. std::unique_ptr annealer_; From 6905b3ecd0c2b0b99b0a51e76f3977e81cd0ece2 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 28 Nov 2024 15:20:20 -0500 Subject: [PATCH 32/32] add more comments --- vpr/src/place/placement_log_printer.h | 50 ++++++++++++++++++++------- vpr/src/place/placer.h | 13 ++++++- 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/vpr/src/place/placement_log_printer.h b/vpr/src/place/placement_log_printer.h index 8c437a922fa..d538c20d895 100644 --- a/vpr/src/place/placement_log_printer.h +++ b/vpr/src/place/placement_log_printer.h @@ -2,17 +2,9 @@ * @file placement_log_printer.h * @brief Declares the PlacementLogPrinter class and associated utilities for logging * and reporting placement-related statistics and timing analysis results. - * - * This file provides tools to monitor and report the progress and results of the placement stage. - * - * ### Key Components: - * - **PlacementLogPrinter**: - * - A utility class for logging placement status, resource utilization, and swap statistics. - * - Prints detailed statistics during the placement process, including initial and post-placement states. - * - Supports a "quiet mode" to suppress output. - * + * ### Integration: - * The tools in this file integrate with the Placer class to provide information about + * The PlacementLogPrinter class integrates with the Placer class to provide information about * the placement process for debugging, optimization, and analysis purposes. */ @@ -33,20 +25,54 @@ struct t_swap_stats; class BlkLocRegistry; class Placer; +/** + * @class PlacementLogPrinter + * @brief A utility class for logging placement status and + * updating the screen view when graphics are enabled. + */ class PlacementLogPrinter { public: - explicit PlacementLogPrinter(const Placer& placer, bool quiet); + /** + * @param placer The placer object from which the placement status is retrieved. + * @param quiet When set true, the logger doesn't print any information. + */ + PlacementLogPrinter(const Placer& placer, + bool quiet); + /** + * @brief Prints the placement status header that shows which metrics are reported + * in each iteration of the annealer's outer loop. + * @details This method should be called once before the first call to print_place_status(). + */ void print_place_status_header() const; + + /** + * @brief Print placement metrics and elapsed time after each outer loop iteration of the annealer. + * If graphics are on, the function will the screen view. + * @param elapsed_sec Time spent in the latest outer loop iteration. + */ + void print_place_status(float elapsed_sec) const; + + /// Reports the resource utilization for each block type. void print_resources_utilization() const; + /// Reports the number of tried temperatures, total swaps, and how many were accepted or rejected. void print_placement_swaps_stats() const; - void print_place_status(float elapsed_sec) const; + /// Reports placement metrics after the initial placement. void print_initial_placement_stats() const; + /// Prints final placement metrics and generates timing reports. void print_post_placement_stats() const; private: + /** + * @brief A constant reference to the Placer object to access the placement status. + * @details PlacementLogPrinter is a friend class for the Placer class, so it can + * access all its private data members. This reference is made constant to avoid + * any accidental modification of the Placer object. + */ const Placer& placer_; + /// Specifies whether this object prints logs and updates the graphics. const bool quiet_; + /// A string buffer to carry the message to shown in the graphical interface. mutable std::vector msg_; }; diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index 086630e2c09..99c00d7e8e5 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -1,7 +1,7 @@ /** * @file placer.h * @brief Declares the Placer class, which encapsulates the functionality, data structures, - * and algorithms required for the placement stage. + * and algorithms required for the (annealing-based) placement stage * * The Placer class initializes necessary objects, performs an initial placement, * and runs simulated annealing optimization. This optimization minimizes @@ -47,6 +47,17 @@ class Placer { bool is_flat, bool quiet); + /** + * @brief Executes the simulated annealing algorithm to optimize placement. + * + * This function minimizes placement costs, including bounding box and timing costs, + * using simulated annealing. During the process, it periodically updates timing information + * and saves a checkpoint of the best placement encountered. + * + * After the simulated annealing completes, the final placement is evaluated against the + * checkpoint. If the final placement's quality is worse than the checkpoint, the checkpoint + * is restored. The final placement is then validated for legality. + */ void place(); /**