diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp index 85f50e9ac62..f17fba8a752 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/SetupAnalysis.hpp @@ -16,7 +16,7 @@ namespace tatum { /** \file * The 'SetupAnalysis' class defines the operations needed by a GraphWalker class - * to perform a setup (max/longest path) analysis. It satisifes and extends the GraphVisitor + * to perform a setup (max/longest path) analysis. It satisfies and extends the GraphVisitor * concept class. * * Setup Analysis Principles diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.cpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.cpp index 510fadd9e51..8eeff58d50b 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.cpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.cpp @@ -132,7 +132,7 @@ Time TimingConstraints::setup_constraint(const DomainId src_domain, const Domain return iter->second; } - //If no capture node specific constraint was found, fallback to the domain pair constriant + //If no capture node specific constraint was found, fallback to the domain pair constraint iter = setup_constraints_.find(NodeDomainPair(src_domain, sink_domain, NodeId::INVALID())); if(iter != setup_constraints_.end()) { return iter->second; diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.hpp index 07288ed08ba..225ac48f7d5 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingConstraints.hpp @@ -45,7 +45,7 @@ class TimingConstraints { ///\returns The source NodeId of the specified domain NodeId clock_domain_source_node(const DomainId id) const; - //\returns whether the specified domain id corresponds to a virtual lcock + //\returns whether the specified domain id corresponds to a virtual clock bool is_virtual_clock(const DomainId id) const; ///\returns The domain of the specified node id if it is a clock source diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp index 70a8bbe2758..7b7f0540891 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisOps.hpp @@ -9,7 +9,7 @@ namespace tatum { namespace detail { * * The operations for CommonAnalysisVisitor to perform setup analysis. * The setup analysis operations define that maximum edge delays are used, and that the - * maixmum arrival time (and minimum required times) are propagated through the timing graph. + * maximum arrival time (and minimum required times) are propagated through the timing graph. * * \see HoldAnalysisOps * \see SetupAnalysisOps diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisVisitor.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisVisitor.hpp index 6b901b21def..82bbd8da30a 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisVisitor.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/CommonAnalysisVisitor.hpp @@ -152,10 +152,10 @@ bool CommonAnalysisVisitor<AnalysisOps>::do_arrival_pre_traverse_node(const Timi bool node_constrained = false; if(tc.node_is_constant_generator(node_id)) { - //We progpagate the tags from constant generators to ensure any sinks driven + //We propagate the tags from constant generators to ensure any sinks driven //only by constant generators are recorded as constrained. // - //We use a special tag to initialize constant generators which gets overritten + //We use a special tag to initialize constant generators which gets overwritten //by any non-constant tag at downstream nodes TimingTag const_gen_tag = ops_.const_gen_tag(); diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp index 313efa244d7..253b31af5ba 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/SetupAnalysisOps.hpp @@ -8,7 +8,7 @@ namespace tatum { namespace detail { * * The operations for CommonAnalysisVisitor to perform setup analysis. * The setup analysis operations define that maximum edge delays are used, and that the - * maixmum arrival time (and minimum required times) are propagated through the timing graph. + * maximum arrival time (and minimum required times) are propagated through the timing graph. * * \see HoldAnalysisOps * \see CommonAnalysisVisitor @@ -121,7 +121,7 @@ class SetupAnalysisOps : public CommonAnalysisOps { Time calculate_slack(const Time required_time, const Time arrival_time) { //Setup requires the arrival to occur *before* the required time, so //slack is the amount of required time left after the arrival time; meaning - //we we subtract the arrival time from the required time to get the setup slack + //we subtract the arrival time from the required time to get the setup slack return required_time - arrival_time; } diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/ParallelLevelizedWalker.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/ParallelLevelizedWalker.hpp index 0cbf1a5863b..0104d10d3e3 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/ParallelLevelizedWalker.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/ParallelLevelizedWalker.hpp @@ -11,7 +11,7 @@ namespace tatum { /** - * A parallel timing analyzer which traveres the timing graph in a levelized + * A parallel timing analyzer which traverses the timing graph in a levelized * manner. However nodes within each level are processed in parallel using * Thread Building Blocks (TBB). If TBB is not available it operates serially and is * equivalent to the SerialWalker. diff --git a/libs/librrgraph/src/base/rr_spatial_lookup.cpp b/libs/librrgraph/src/base/rr_spatial_lookup.cpp index 6959659be8b..3b23d7d49e9 100644 --- a/libs/librrgraph/src/base/rr_spatial_lookup.cpp +++ b/libs/librrgraph/src/base/rr_spatial_lookup.cpp @@ -163,7 +163,7 @@ std::vector<RRNodeId> RRSpatialLookup::find_nodes(int layer, nodes.reserve(num_nodes); for (const auto& node : rr_node_indices_[type][layer][node_x][node_y][side]) { if (RRNodeId(node)) { - nodes.push_back(RRNodeId(node)); + nodes.emplace_back(node); } } diff --git a/libs/libvtrutil/src/vtr_time.h b/libs/libvtrutil/src/vtr_time.h index 4e389ef5026..3f187e59288 100644 --- a/libs/libvtrutil/src/vtr_time.h +++ b/libs/libvtrutil/src/vtr_time.h @@ -36,7 +36,7 @@ class Timer { constexpr static float BYTE_TO_MIB = 1024 * 1024; }; -///@brief Scoped time class which prints the time elapsed for the specifid action +///@brief Scoped time class which prints the time elapsed for the specified action class ScopedActionTimer : public Timer { public: ScopedActionTimer(std::string action); diff --git a/vpr/src/base/clustered_netlist_utils.h b/vpr/src/base/clustered_netlist_utils.h index 52688f88e47..b5d1504ed91 100644 --- a/vpr/src/base/clustered_netlist_utils.h +++ b/vpr/src/base/clustered_netlist_utils.h @@ -14,6 +14,7 @@ class ClusteredPinAtomPinsLookup { typedef typename vtr::Range<atom_pin_iterator> atom_pin_range; public: + ClusteredPinAtomPinsLookup() = default; ClusteredPinAtomPinsLookup(const ClusteredNetlist& clustered_netlist, const AtomNetlist& atom_netlist, const IntraLbPbPinLookup& pb_gpin_lookup); atom_pin_range connected_atom_pins(ClusterPinId clustered_pin) const; diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 1641e255b89..78124dd85c3 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -1377,7 +1377,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio " * set_nets <int>\n" " Sets the net drawing state\n" " * set_cpd <int>\n" - " Sets the criticla path delay drawing state\n" + " Sets the critical path delay drawing state\n" " * set_routing_util <int>\n" " Sets the routing utilization drawing state\n" " * set_clip_routing_util <int>\n" @@ -2256,7 +2256,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .show_in(argparse::ShowIn::HELP_ONLY); place_timing_grp.add_argument<e_reducer, ParseReducer>(args.place_delay_model_reducer, "--place_delay_model_reducer") - .help("When calculating delta delays for the placment delay model how are multiple values combined?") + .help("When calculating delta delays for the placement delay model how are multiple values combined?") .default_value("min") .show_in(argparse::ShowIn::HELP_ONLY); diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp index 546bc8b55f3..46bdd750ca9 100644 --- a/vpr/src/draw/draw.cpp +++ b/vpr/src/draw/draw.cpp @@ -367,7 +367,7 @@ static void initial_setup_NO_PICTURE_to_ROUTING_with_crit_path( } #endif //NO_GRAPHICS -void update_screen(ScreenUpdatePriority priority, const char* msg, enum pic_type pic_on_screen_val, std::shared_ptr<SetupTimingInfo> setup_timing_info) { +void update_screen(ScreenUpdatePriority priority, const char* msg, enum pic_type pic_on_screen_val, std::shared_ptr<const SetupTimingInfo> setup_timing_info) { #ifndef NO_GRAPHICS /* Updates the screen if the user has requested graphics. The priority * diff --git a/vpr/src/draw/draw.h b/vpr/src/draw/draw.h index 2bbd17d077f..355b2891931 100644 --- a/vpr/src/draw/draw.h +++ b/vpr/src/draw/draw.h @@ -42,7 +42,7 @@ extern ezgl::application application; #endif /* NO_GRAPHICS */ -void update_screen(ScreenUpdatePriority priority, const char* msg, enum pic_type pic_on_screen_val, std::shared_ptr<SetupTimingInfo> timing_info); +void update_screen(ScreenUpdatePriority priority, const char* msg, enum pic_type pic_on_screen_val, std::shared_ptr<const SetupTimingInfo> timing_info); //FIXME: Currently broken if no rr-graph is loaded /** diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 1d8836956ab..b18f60b27bd 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -471,7 +471,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, */ // Invalidates timing of modified connections for incremental timing updates. - pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); + pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_); /* Update the connection_timing_cost and connection_delay * values from the temporary values. */ @@ -532,7 +532,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, /* Invalidates timing of modified connections for incremental * timing updates. These invalidations are accumulated for a * big timing update in the outer loop. */ - pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); + pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_); /* Update the connection_timing_cost and connection_delay * values from the temporary values. */ @@ -588,7 +588,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, /* Re-invalidate the affected sink pins since the proposed * move is rejected, and the same blocks are reverted to * their original positions. */ - pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); + pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_); // Revert the timing update update_timing_classes(crit_params, timing_info_, criticalities_, diff --git a/vpr/src/place/compressed_grid.cpp b/vpr/src/place/compressed_grid.cpp index 0e78e6b99b7..33fa04cbfc1 100644 --- a/vpr/src/place/compressed_grid.cpp +++ b/vpr/src/place/compressed_grid.cpp @@ -1,6 +1,9 @@ + #include "compressed_grid.h" + #include "arch_util.h" #include "globals.h" +#include "vtr_time.h" /** * @brief Creates a compressed grid from the given locations. @@ -16,6 +19,12 @@ static t_compressed_block_grid create_compressed_block_grid(const std::vector<st std::vector<t_compressed_block_grid> create_compressed_block_grids() { + /* Measure how long it takes to allocate and initialize compressed grid. + * The measured execution time is printed when this object goes out of scope + * at the end of this function. + */ + vtr::ScopedStartFinishTimer compressed_grid_timer("Compressed grid construction"); + auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; const int num_layers = grid.get_num_layers(); diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 69617b278a2..3506d00b801 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -1,59 +1,27 @@ -#include <cstdio> -#include <cmath> + #include <memory> -#include <chrono> -#include <optional> -#include "NetPinTimingInvalidator.h" -#include "clustered_netlist.h" -#include "device_grid.h" -#include "verify_placement.h" #include "vtr_assert.h" #include "vtr_log.h" -#include "vtr_util.h" #include "vtr_time.h" -#include "vtr_math.h" - #include "vpr_types.h" -#include "vpr_error.h" #include "vpr_utils.h" #include "globals.h" #include "place.h" #include "annealer.h" -#include "read_place.h" -#include "draw.h" -#include "timing_place.h" #include "read_xml_arch_file.h" #include "echo_files.h" #include "histogram.h" -#include "place_util.h" -#include "analytic_placer.h" -#include "initial_placement.h" #include "place_delay_model.h" -#include "place_timing_update.h" -#include "move_transactions.h" #include "move_utils.h" #include "buttons.h" -#include "PlacementDelayCalculator.h" #include "VprTimingGraphResolver.h" -#include "timing_util.h" -#include "timing_info.h" -#include "concrete_timing_info.h" -#include "tatum/echo_writer.hpp" #include "tatum/TimingReporter.hpp" #include "RL_agent_util.h" -#include "place_checkpoint.h" - -#include "clustered_netlist_utils.h" - -#include "noc_place_utils.h" - -#include "net_cost_handler.h" -#include "placer_state.h" - +#include "placer.h" /********************* Static subroutines local to place.c *******************/ #ifdef VERBOSE @@ -69,61 +37,6 @@ void print_clb_placement(const char* fname); static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, const RRGraphView& rr_graph); -static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - const std::vector<t_direct_inf>& directs, - PlacerState& placer_state, - std::optional<NocCostHandler>& noc_cost_handler); - -static void free_placement_structs(); - -static void check_place(const t_placer_costs& costs, - const PlaceDelayModel* delay_model, - const PlacerCriticalities* criticalities, - const t_place_algorithm& place_algorithm, - const t_noc_opts& noc_opts, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - const std::optional<NocCostHandler>& noc_cost_handler); - -static int check_placement_costs(const t_placer_costs& costs, - const PlaceDelayModel* delay_model, - const PlacerCriticalities* criticalities, - const t_place_algorithm& place_algorithm, - PlacerState& placer_state, - NetCostHandler& net_cost_handler); - -static int count_connections(); - -static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, - const t_analysis_opts& analysis_opts, - const SetupTimingInfo& timing_info, - const PlacementDelayCalculator& delay_calc, - bool is_flat, - const BlkLocRegistry& blk_loc_registry); - -static void print_place_status_header(bool noc_enabled); - -static void print_place_status(const t_annealing_state& state, - const t_placer_statistics& stats, - float elapsed_sec, - float cpd, - float sTNS, - float sWNS, - size_t tot_moves, - bool noc_enabled, - const NocCostTerms& noc_cost_terms); - -static void print_resources_utilization(const BlkLocRegistry& blk_loc_registry); - -static void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats); - -/** - * @brief Copies the placement location variables into the global placement context. - * @param blk_loc_registry The placement location variables to be copied. - */ -static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry); - /*****************************************************************************/ void try_place(const Netlist<>& net_list, const t_placer_opts& placer_opts, @@ -135,39 +48,20 @@ void try_place(const Netlist<>& net_list, std::vector<t_segment_inf>& segment_inf, const std::vector<t_direct_inf>& directs, bool is_flat) { - /* Does almost all the work of placing a circuit. Width_fac gives the * - * width of the widest channel. Place_cost_exp says what exponent the * - * width should be taken to when calculating costs. This allows a * - * greater bias for anisotropic architectures. */ /* Currently, the functions that require is_flat as their parameter and are called during placement should * receive is_flat as false. For example, if the RR graph of router lookahead is built here, it should be as * if is_flat is false, even if is_flat is set to true from the command line. */ VTR_ASSERT(!is_flat); - auto& device_ctx = g_vpr_ctx.device(); - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - auto& timing_ctx = g_vpr_ctx.timing(); - auto pre_place_timing_stats = timing_ctx.stats; - - t_placer_costs costs(placer_opts.place_algorithm, noc_opts.noc); - - tatum::TimingPathInfo critical_path; - float sTNS = NAN; - float sWNS = NAN; - - char msg[vtr::bufsize]; - - t_placement_checkpoint placement_checkpoint; + const auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& atom_ctx = g_vpr_ctx.atom(); - std::shared_ptr<SetupTimingInfo> timing_info; - std::shared_ptr<PlacementDelayCalculator> placement_delay_calc; - std::unique_ptr<PlaceDelayModel> place_delay_model; - std::unique_ptr<PlacerSetupSlacks> placer_setup_slacks; - std::unique_ptr<PlacerCriticalities> placer_criticalities; - std::unique_ptr<NetPinTimingInvalidator> pin_timing_invalidator; + /* Placement delay model is independent of the placement and can be shared across + * multiple placers if we are performing parallel annealing. + * So, it is created and initialized once. */ + std::shared_ptr<PlaceDelayModel> place_delay_model; if (placer_opts.place_algorithm.is_timing_driven()) { /*do this before the initial placement to avoid messing up the initial placement */ @@ -192,454 +86,37 @@ void try_place(const Netlist<>& net_list, VTR_LOG("Bounding box mode is %s\n", (cube_bb ? "Cube" : "Per-layer")); VTR_LOG("\n"); - int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333)); - - PlacerState placer_state(placer_opts.place_algorithm.is_timing_driven(), cube_bb); - auto& blk_loc_registry = placer_state.mutable_blk_loc_registry(); - const auto& p_timing_ctx = placer_state.timing(); - const auto& p_runtime_ctx = placer_state.runtime(); - - vtr::RngContainer rng(placer_opts.seed); - - std::optional<NocCostHandler> noc_cost_handler; - // create cost handler objects - NetCostHandler net_cost_handler = alloc_and_load_placement_structs(placer_opts, noc_opts, directs, - placer_state, noc_cost_handler); - -#ifndef NO_GRAPHICS - if (noc_cost_handler.has_value()) { - get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(noc_cost_handler->get_link_bandwidth_usages()); - } -#endif - - vtr::ScopedStartFinishTimer timer("Placement"); - - if (noc_opts.noc) { - normalize_noc_cost_weighting_factor(const_cast<t_noc_opts&>(noc_opts)); - } - - initial_placement(placer_opts, placer_opts.constraints_file.c_str(), - noc_opts, blk_loc_registry, noc_cost_handler, rng); - - //create the move generator based on the chosen strategy - auto [move_generator, move_generator2] = create_move_generators(placer_state, placer_opts, move_lim, noc_opts.noc_centroid_weight, rng); - - if (!placer_opts.write_initial_place_file.empty()) { - print_place(nullptr, nullptr, placer_opts.write_initial_place_file.c_str(), placer_state.block_locs()); - } + auto& place_ctx = g_vpr_ctx.mutable_placement(); -#ifdef ENABLE_ANALYTIC_PLACE - /* - * Analytic Placer: - * Passes in the initial_placement via vpr_context, and passes its placement back via locations marked on - * both the clb_netlist and the gird. - * Most of anneal is disabled later by setting initial temperature to 0 and only further optimizes in quench + /* Make the global instance of BlkLocRegistry inaccessible through the getter methods of the + * placement context. This is done to make sure that the placement stage only accesses its + * own local instances of BlkLocRegistry. */ - if (placer_opts.enable_analytic_placer) { - AnalyticPlacer{blk_loc_registry}.ap_place(); - } - -#endif /* ENABLE_ANALYTIC_PLACE */ - - // Update physical pin values - for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { - blk_loc_registry.place_sync_external_block_connections(block_id); - } + place_ctx.lock_loc_vars(); + place_ctx.compressed_block_grids = create_compressed_block_grids(); - const int width_fac = placer_opts.place_chan_width; - init_draw_coords((float)width_fac, blk_loc_registry); + /* Start measuring placement time. The measured execution time will be printed + * when this object goes out of scope at the end of this function. + */ + vtr::ScopedStartFinishTimer placement_timer("Placement"); - /* Allocated here because it goes into timing critical code where each memory allocation is expensive */ + // Enables fast look-up pb graph pins from block pin indices IntraLbPbPinLookup pb_gpin_lookup(device_ctx.logical_block_types); - //Enables fast look-up of atom pins connect to CLB pins + // Enables fast look-up of atom pins connect to CLB pins ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, pb_gpin_lookup); - /* Gets initial cost and loads bounding boxes. */ - - if (placer_opts.place_algorithm.is_timing_driven()) { - costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL); - - int num_connections = count_connections(); - VTR_LOG("\n"); - VTR_LOG("There are %d point to point connections in this circuit.\n", - num_connections); - VTR_LOG("\n"); - - //Update the point-to-point delays from the initial placement - comp_td_connection_delays(place_delay_model.get(), placer_state); - - /* - * Initialize timing analysis - */ - // For placement, we don't use flat-routing - placement_delay_calc = std::make_shared<PlacementDelayCalculator>(atom_ctx.nlist, - atom_ctx.lookup, - p_timing_ctx.connection_delay, - is_flat); - placement_delay_calc->set_tsu_margin_relative(placer_opts.tsu_rel_margin); - placement_delay_calc->set_tsu_margin_absolute(placer_opts.tsu_abs_margin); - - timing_info = make_setup_timing_info(placement_delay_calc, placer_opts.timing_update_type); - - placer_setup_slacks = std::make_unique<PlacerSetupSlacks>(cluster_ctx.clb_nlist, netlist_pin_lookup); - - placer_criticalities = std::make_unique<PlacerCriticalities>(cluster_ctx.clb_nlist, netlist_pin_lookup); - - pin_timing_invalidator = make_net_pin_timing_invalidator( - placer_opts.timing_update_type, - net_list, - netlist_pin_lookup, - atom_ctx.nlist, - atom_ctx.lookup, - *timing_info->timing_graph(), - is_flat); - - //First time compute timing and costs, compute from scratch - PlaceCritParams crit_params; - crit_params.crit_exponent = placer_opts.td_place_exp_first; - crit_params.crit_limit = placer_opts.place_crit_limit; - - initialize_timing_info(crit_params, place_delay_model.get(), placer_criticalities.get(), - placer_setup_slacks.get(), pin_timing_invalidator.get(), - timing_info.get(), &costs, placer_state); - - critical_path = timing_info->least_slack_critical_path(); - - /* Write out the initial timing echo file */ - if (isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)) { - tatum::write_echo( - getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH), - *timing_ctx.graph, *timing_ctx.constraints, - *placement_delay_calc, timing_info->analyzer()); - - tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node); - - write_setup_timing_graph_dot( - getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH) - + std::string(".dot"), - *timing_info, debug_tnode); - } - - /* Initialize the normalization factors. Calling costs.update_norm_factors() * - * here would fail the golden results of strong_sdc benchmark */ - costs.timing_cost_norm = 1 / costs.timing_cost; - costs.bb_cost_norm = 1 / costs.bb_cost; - } else { - VTR_ASSERT(placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); - - /* Total cost is the same as wirelength cost normalized*/ - costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL); - costs.bb_cost_norm = 1 / costs.bb_cost; - - /* Timing cost and normalization factors are not used */ - constexpr double INVALID_COST = std::numeric_limits<double>::quiet_NaN(); - costs.timing_cost = INVALID_COST; - costs.timing_cost_norm = INVALID_COST; - } - - if (noc_opts.noc) { - VTR_ASSERT(noc_cost_handler.has_value()); - - // get the costs associated with the NoC - costs.noc_cost_terms.aggregate_bandwidth = noc_cost_handler->comp_noc_aggregate_bandwidth_cost(); - std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = noc_cost_handler->comp_noc_latency_cost(); - costs.noc_cost_terms.congestion = noc_cost_handler->comp_noc_congestion_cost(); - - // initialize all the noc normalization factors - noc_cost_handler->update_noc_normalization_factors(costs); - } - - // set the starting total placement cost - costs.cost = costs.get_total_cost(placer_opts, noc_opts); - - //Sanity check that initial placement is legal - check_place(costs, - place_delay_model.get(), - placer_criticalities.get(), - placer_opts.place_algorithm, - noc_opts, - placer_state, - net_cost_handler, - noc_cost_handler); - - //Initial placement statistics - VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost, - costs.bb_cost, costs.timing_cost); - if (noc_opts.noc) { - VTR_ASSERT(noc_cost_handler.has_value()); - - noc_cost_handler->print_noc_costs("Initial NoC Placement Costs", costs, noc_opts); - } - if (placer_opts.place_algorithm.is_timing_driven()) { - VTR_LOG( - "Initial placement estimated Critical Path Delay (CPD): %g ns\n", - 1e9 * critical_path.delay()); - VTR_LOG( - "Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n", - 1e9 * timing_info->setup_total_negative_slack()); - VTR_LOG( - "Initial placement estimated setup Worst Negative Slack (sWNS): %g ns\n", - 1e9 * timing_info->setup_worst_negative_slack()); - VTR_LOG("\n"); - - VTR_LOG("Initial placement estimated setup slack histogram:\n"); - print_histogram(create_setup_slack_histogram(*timing_info->setup_analyzer())); - } - - size_t num_macro_members = 0; - for (auto& macro : blk_loc_registry.place_macros().macros()) { - num_macro_members += macro.members.size(); - } - VTR_LOG( - "Placement contains %zu placement macros involving %zu blocks (average macro size %f)\n", - blk_loc_registry.place_macros().macros().size(), num_macro_members, - float(num_macro_members) / blk_loc_registry.place_macros().macros().size()); - VTR_LOG("\n"); - - sprintf(msg, - "Initial Placement. Cost: %g BB Cost: %g TD Cost %g \t Channel Factor: %d", - costs.cost, costs.bb_cost, costs.timing_cost, width_fac); - - //Draw the initial placement - update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info); - - if (placer_opts.placement_saves_per_temperature >= 1) { - std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0, - 0); - VTR_LOG("Saving initial placement to file: %s\n", filename.c_str()); - print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); - } - - bool skip_anneal = false; - -#ifdef ENABLE_ANALYTIC_PLACE - // Analytic placer: When enabled, skip most of the annealing and go straight to quench - // TODO: refactor goto label. - if (placer_opts.enable_analytic_placer) { - skip_anneal = true; - } -#endif /* ENABLE_ANALYTIC_PLACE */ - - PlacementAnnealer annealer(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler, - noc_opts, rng, std::move(move_generator), std::move(move_generator2), place_delay_model.get(), - placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), move_lim); - - const t_annealing_state& annealing_state = annealer.get_annealing_state(); - const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); - - if (!skip_anneal) { - //Table header - VTR_LOG("\n"); - print_place_status_header(noc_opts.noc); - - /* Outer loop of the simulated annealing begins */ - do { - vtr::Timer temperature_timer; - - annealer.outer_loop_update_timing_info(); - - if (placer_opts.place_algorithm.is_timing_driven()) { - critical_path = timing_info->least_slack_critical_path(); - sTNS = timing_info->setup_total_negative_slack(); - sWNS = timing_info->setup_worst_negative_slack(); - - // see if we should save the current placement solution as a checkpoint - if (placer_opts.place_checkpointing && annealer.get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) { - save_placement_checkpoint_if_needed(blk_loc_registry.block_locs(), - placement_checkpoint, - timing_info, costs, critical_path.delay()); - } - } - - // do a complete inner loop iteration - annealer.placement_inner_loop(); - - print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), - critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(), - noc_opts.noc, costs.noc_cost_terms); - - sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g", - costs.cost, costs.bb_cost, costs.timing_cost, annealing_state.t); - update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info); - - //#ifdef VERBOSE - // if (getEchoEnabled()) { - // print_clb_placement("first_iteration_clb_placement.echo"); - // } - //#endif - } while (annealer.outer_loop_update_state()); - /* Outer loop of the simulated annealing ends */ - } //skip_anneal ends - - // Start Quench - annealer.start_quench(); - - auto pre_quench_timing_stats = timing_ctx.stats; - { /* Quench */ - - vtr::ScopedFinishTimer temperature_timer("Placement Quench"); - - annealer.outer_loop_update_timing_info(); - - /* Run inner loop again with temperature = 0 so as to accept only swaps - * which reduce the cost of the placement */ - annealer.placement_inner_loop(); - - if (placer_opts.place_quench_algorithm.is_timing_driven()) { - critical_path = timing_info->least_slack_critical_path(); - sTNS = timing_info->setup_total_negative_slack(); - sWNS = timing_info->setup_worst_negative_slack(); - } - - print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), - critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(), - noc_opts.noc, costs.noc_cost_terms); - } - auto post_quench_timing_stats = timing_ctx.stats; - - //Final timing analysis - PlaceCritParams crit_params; - crit_params.crit_exponent = annealing_state.crit_exponent; - crit_params.crit_limit = placer_opts.place_crit_limit; - - if (placer_opts.place_algorithm.is_timing_driven()) { - perform_full_timing_update(crit_params, place_delay_model.get(), placer_criticalities.get(), - placer_setup_slacks.get(), pin_timing_invalidator.get(), - timing_info.get(), &costs, placer_state); - VTR_LOG("post-quench CPD = %g (ns) \n", - 1e9 * timing_info->least_slack_critical_path().delay()); - } - - //See if our latest checkpoint is better than the current placement solution - if (placer_opts.place_checkpointing) - restore_best_placement(placer_state, - placement_checkpoint, timing_info, costs, - placer_criticalities, placer_setup_slacks, place_delay_model, - pin_timing_invalidator, crit_params, noc_cost_handler); - - if (placer_opts.placement_saves_per_temperature >= 1) { - std::string filename = vtr::string_fmt("placement_%03d_%03d.place", - annealing_state.num_temps + 1, 0); - VTR_LOG("Saving final placement to file: %s\n", filename.c_str()); - print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); - } - - // TODO: - // 1. add some subroutine hierarchy! Too big! - - //#ifdef VERBOSE - // if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_END_CLB_PLACEMENT)) { - // print_clb_placement(getEchoFileName(E_ECHO_END_CLB_PLACEMENT)); - // } - //#endif - - // Update physical pin values - for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { - blk_loc_registry.place_sync_external_block_connections(block_id); - } + Placer placer(net_list, placer_opts, analysis_opts, noc_opts, pb_gpin_lookup, netlist_pin_lookup, + directs, place_delay_model, cube_bb, is_flat, /*quiet=*/false); - check_place(costs, - place_delay_model.get(), - placer_criticalities.get(), - placer_opts.place_algorithm, - noc_opts, - placer_state, - net_cost_handler, - noc_cost_handler); + placer.place(); - //Some stats - VTR_LOG("\n"); - VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called); - annealer.get_move_abortion_logger().report_aborted_moves(); - - if (placer_opts.place_algorithm.is_timing_driven()) { - //Final timing estimate - VTR_ASSERT(timing_info); - - critical_path = timing_info->least_slack_critical_path(); - - if (isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) { - tatum::write_echo( - getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH), - *timing_ctx.graph, *timing_ctx.constraints, - *placement_delay_calc, timing_info->analyzer()); - - tatum::NodeId debug_tnode = id_or_pin_name_to_tnode( - analysis_opts.echo_dot_timing_graph_node); - write_setup_timing_graph_dot( - getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH) - + std::string(".dot"), - *timing_info, debug_tnode); - } - - generate_post_place_timing_reports(placer_opts, analysis_opts, *timing_info, - *placement_delay_calc, is_flat, blk_loc_registry); - - /* Print critical path delay metrics */ - VTR_LOG("\n"); - print_setup_timing_summary(*timing_ctx.constraints, - *timing_info->setup_analyzer(), "Placement estimated ", ""); - } - - sprintf(msg, - "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d", - costs.cost, costs.bb_cost, costs.timing_cost, width_fac); - VTR_LOG("Placement cost: %g, bb_cost: %g, td_cost: %g, \n", costs.cost, - costs.bb_cost, costs.timing_cost); - // print the noc costs info - if (noc_opts.noc) { - VTR_ASSERT(noc_cost_handler.has_value()); - noc_cost_handler->print_noc_costs("\nNoC Placement Costs", costs, noc_opts); - -#ifdef ENABLE_NOC_SAT_ROUTING - if (costs.noc_cost_terms.congestion > 0.0) { - VTR_LOG("NoC routing configuration is congested. Invoking the SAT NoC router.\n"); - invoke_sat_router(costs, noc_opts, placer_opts.seed); - } -#endif //ENABLE_NOC_SAT_ROUTING - } - - update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info); - // Print out swap statistics - print_resources_utilization(blk_loc_registry); - - print_placement_swaps_stats(annealing_state, swap_stats); - - move_type_stats.print_placement_move_types_stats(); - - if (noc_opts.noc) { - write_noc_placement_file(noc_opts.noc_placement_file_name, blk_loc_registry.block_locs()); - } - - free_placement_structs(); - - print_timing_stats("Placement Quench", post_quench_timing_stats, pre_quench_timing_stats); - print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats); - - VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n", - p_runtime_ctx.f_update_td_costs_connections_elapsed_sec, - p_runtime_ctx.f_update_td_costs_nets_elapsed_sec, - p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec, - p_runtime_ctx.f_update_td_costs_total_elapsed_sec); - - copy_locs_to_global_state(blk_loc_registry); -} - -/*only count non-global connections */ -static int count_connections() { - auto& cluster_ctx = g_vpr_ctx.clustering(); - - int count = 0; - - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { - if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) { - continue; - } - - count += cluster_ctx.clb_nlist.net_sinks(net_id).size(); - } + vtr::release_memory(place_ctx.compressed_block_grids); - return count; + /* The placer object has its own copy of block locations and doesn't update + * the global context directly. We need to copy its internal data structures + * to the global placement context before it goes out of scope. + */ + placer.copy_locs_to_global_state(place_ctx); } static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, @@ -667,112 +144,6 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, return cube_bb; } -/* Allocates the major structures needed only by the placer, primarily for * - * computing costs quickly and such. */ -static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - const std::vector<t_direct_inf>& directs, - PlacerState& placer_state, - std::optional<NocCostHandler>& noc_cost_handler) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); - - place_ctx.lock_loc_vars(); - - init_placement_context(placer_state.mutable_blk_loc_registry(), directs); - - place_ctx.compressed_block_grids = create_compressed_block_grids(); - - if (noc_opts.noc) { - noc_cost_handler.emplace(placer_state.block_locs()); - } - - return NetCostHandler{placer_opts, placer_state, place_ctx.cube_bb}; -} - -/* Frees the major structures needed by the placer (and not needed * - * elsewhere). */ -static void free_placement_structs() { - auto& place_ctx = g_vpr_ctx.mutable_placement(); - vtr::release_memory(place_ctx.compressed_block_grids); -} - -static void check_place(const t_placer_costs& costs, - const PlaceDelayModel* delay_model, - const PlacerCriticalities* criticalities, - const t_place_algorithm& place_algorithm, - const t_noc_opts& noc_opts, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - const std::optional<NocCostHandler>& noc_cost_handler) { - /* Checks that the placement has not confused our data structures. * - * i.e. the clb and block structures agree about the locations of * - * every block, blocks are in legal spots, etc. Also recomputes * - * the final placement cost from scratch and makes sure it is * - * within roundoff of what we think the cost is. */ - const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - const DeviceGrid& device_grid = g_vpr_ctx.device().grid; - const auto& cluster_constraints = g_vpr_ctx.floorplanning().cluster_constraints; - - int error = 0; - - // Verify the placement invariants independent to the placement flow. - error += verify_placement(placer_state.blk_loc_registry(), - clb_nlist, - device_grid, - cluster_constraints); - - error += check_placement_costs(costs, delay_model, criticalities, place_algorithm, placer_state, net_cost_handler); - - if (noc_opts.noc) { - // check the NoC costs during placement if the user is using the NoC supported flow - error += noc_cost_handler->check_noc_placement_costs(costs, PL_INCREMENTAL_COST_TOLERANCE, noc_opts); - // make sure NoC routing configuration does not create any cycles in CDG - error += (int)noc_cost_handler->noc_routing_has_cycle(); - } - - if (error == 0) { - VTR_LOG("\n"); - VTR_LOG("Completed placement consistency check successfully.\n"); - - } else { - VPR_ERROR(VPR_ERROR_PLACE, - "\nCompleted placement consistency check, %d errors found.\n" - "Aborting program.\n", - error); - } -} - -static int check_placement_costs(const t_placer_costs& costs, - const PlaceDelayModel* delay_model, - const PlacerCriticalities* criticalities, - const t_place_algorithm& place_algorithm, - PlacerState& placer_state, - NetCostHandler& net_cost_handler) { - int error = 0; - double timing_cost_check; - - double bb_cost_check = net_cost_handler.comp_bb_cost(e_cost_methods::CHECK); - - if (fabs(bb_cost_check - costs.bb_cost) > costs.bb_cost * PL_INCREMENTAL_COST_TOLERANCE) { - VTR_LOG_ERROR( - "bb_cost_check: %g and bb_cost: %g differ in check_place.\n", - bb_cost_check, costs.bb_cost); - error++; - } - - if (place_algorithm.is_timing_driven()) { - comp_td_costs(delay_model, *criticalities, placer_state, &timing_cost_check); - //VTR_LOG("timing_cost recomputed from scratch: %g\n", timing_cost_check); - if (fabs(timing_cost_check - costs.timing_cost) > costs.timing_cost * PL_INCREMENTAL_COST_TOLERANCE) { - VTR_LOG_ERROR( - "timing_cost_check: %g and timing_cost: %g differ in check_place.\n", - timing_cost_check, costs.timing_cost); - error++; - } - } - return error; -} - #ifdef VERBOSE void print_clb_placement(const char* fname) { /* Prints out the clb placements to a file. */ @@ -792,27 +163,6 @@ void print_clb_placement(const char* fname) { } #endif -static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, - const t_analysis_opts& analysis_opts, - const SetupTimingInfo& timing_info, - const PlacementDelayCalculator& delay_calc, - bool is_flat, - const BlkLocRegistry& blk_loc_registry) { - const auto& timing_ctx = g_vpr_ctx.timing(); - const auto& atom_ctx = g_vpr_ctx.atom(); - - VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, - delay_calc, is_flat, blk_loc_registry); - resolver.set_detail_level(analysis_opts.timing_report_detail); - - tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph, - *timing_ctx.constraints); - - timing_reporter.report_timing_setup( - placer_opts.post_place_timing_report_file, - *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths); -} - #if 0 static void update_screen_debug(); @@ -824,131 +174,3 @@ static void update_screen_debug() { } #endif -static void print_place_status_header(bool noc_enabled) { - if (!noc_enabled) { - VTR_LOG( - "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n"); - VTR_LOG( - "Tnum Time T Av Cost Av BB Cost Av TD Cost CPD sTNS sWNS Ac Rate Std Dev R lim Crit Exp Tot Moves Alpha\n"); - VTR_LOG( - " (sec) (ns) (ns) (ns) \n"); - VTR_LOG( - "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n"); - } else { - VTR_LOG( - "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- --------- ---------\n"); - VTR_LOG( - "Tnum Time T Av Cost Av BB Cost Av TD Cost CPD sTNS sWNS Ac Rate Std Dev R lim Crit Exp Tot Moves Alpha Agg. BW Agg. Lat Lat Over. NoC Cong.\n"); - VTR_LOG( - " (sec) (ns) (ns) (ns) (bps) (ns) (ns) \n"); - VTR_LOG( - "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- --------- ---------\n"); - } -} - -static void print_place_status(const t_annealing_state& state, - const t_placer_statistics& stats, - float elapsed_sec, - float cpd, - float sTNS, - float sWNS, - size_t tot_moves, - bool noc_enabled, - const NocCostTerms& noc_cost_terms) { - VTR_LOG( - "%4zu %6.1f %7.1e " - "%7.3f %10.2f %-10.5g " - "%7.3f % 10.3g % 8.3f " - "%7.3f %7.4f %6.1f %8.2f", - state.num_temps, elapsed_sec, state.t, - stats.av_cost, stats.av_bb_cost, stats.av_timing_cost, - 1e9 * cpd, 1e9 * sTNS, 1e9 * sWNS, - stats.success_rate, stats.std_dev, state.rlim, state.crit_exponent); - - pretty_print_uint(" ", tot_moves, 9, 3); - - VTR_LOG(" %6.3f", state.alpha); - - if (noc_enabled) { - VTR_LOG( - " %7.2e %7.2e" - " %8.2e %8.2f", - noc_cost_terms.aggregate_bandwidth, noc_cost_terms.latency, - noc_cost_terms.latency_overrun, noc_cost_terms.congestion); - } - - VTR_LOG("\n"); - fflush(stdout); -} - -static void print_resources_utilization(const BlkLocRegistry& blk_loc_registry) { - const auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& device_ctx = g_vpr_ctx.device(); - const auto& block_locs = blk_loc_registry.block_locs(); - - size_t max_block_name = 0; - size_t max_tile_name = 0; - - //Record the resource requirement - std::map<t_logical_block_type_ptr, size_t> num_type_instances; - std::map<t_logical_block_type_ptr, std::map<t_physical_tile_type_ptr, size_t>> num_placed_instances; - - for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { - const t_pl_loc& loc = block_locs[blk_id].loc; - - t_physical_tile_type_ptr physical_tile = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer}); - t_logical_block_type_ptr logical_block = cluster_ctx.clb_nlist.block_type(blk_id); - - num_type_instances[logical_block]++; - num_placed_instances[logical_block][physical_tile]++; - - max_block_name = std::max(max_block_name, logical_block->name.length()); - max_tile_name = std::max(max_tile_name, physical_tile->name.length()); - } - - VTR_LOG("\n"); - VTR_LOG("Placement resource usage:\n"); - for (const auto [logical_block_type_ptr, _] : num_type_instances) { - for (const auto [physical_tile_type_ptr, num_instances] : num_placed_instances[logical_block_type_ptr]) { - VTR_LOG(" %-*s implemented as %-*s: %d\n", max_block_name, - logical_block_type_ptr->name.c_str(), max_tile_name, - physical_tile_type_ptr->name.c_str(), num_instances); - } - } - VTR_LOG("\n"); -} - -static void print_placement_swaps_stats(const t_annealing_state& state, const t_swap_stats& swap_stats) { - size_t total_swap_attempts = swap_stats.num_swap_rejected + swap_stats.num_swap_accepted + swap_stats.num_swap_aborted; - VTR_ASSERT(total_swap_attempts > 0); - - size_t num_swap_print_digits = ceil(log10(total_swap_attempts)); - float reject_rate = (float)swap_stats.num_swap_rejected / total_swap_attempts; - float accept_rate = (float)swap_stats.num_swap_accepted / total_swap_attempts; - float abort_rate = (float)swap_stats.num_swap_aborted / total_swap_attempts; - VTR_LOG("Placement number of temperatures: %d\n", state.num_temps); - VTR_LOG("Placement total # of swap attempts: %*d\n", num_swap_print_digits, - total_swap_attempts); - VTR_LOG("\tSwaps accepted: %*d (%4.1f %%)\n", num_swap_print_digits, - swap_stats.num_swap_accepted, 100 * accept_rate); - VTR_LOG("\tSwaps rejected: %*d (%4.1f %%)\n", num_swap_print_digits, - swap_stats.num_swap_rejected, 100 * reject_rate); - VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits, - swap_stats.num_swap_aborted, 100 * abort_rate); -} - -static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); - - // the placement location variables should be unlocked before being accessed - place_ctx.unlock_loc_vars(); - - // copy the local location variables into the global state - auto& global_blk_loc_registry = place_ctx.mutable_blk_loc_registry(); - global_blk_loc_registry = blk_loc_registry; - -#ifndef NO_GRAPHICS - // update the graphics' reference to placement location variables - get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry); -#endif -} \ No newline at end of file diff --git a/vpr/src/place/place.h b/vpr/src/place/place.h index 210663823a8..e4a0172ba4e 100644 --- a/vpr/src/place/place.h +++ b/vpr/src/place/place.h @@ -1,5 +1,5 @@ -#ifndef VPR_PLACE_H -#define VPR_PLACE_H + +#pragma once #include "vpr_types.h" @@ -13,5 +13,3 @@ void try_place(const Netlist<>& net_list, std::vector<t_segment_inf>& segment_inf, const std::vector<t_direct_inf>& directs, bool is_flat); - -#endif diff --git a/vpr/src/place/place_checkpoint.cpp b/vpr/src/place/place_checkpoint.cpp index 85f4ab28e18..60b009d85ae 100644 --- a/vpr/src/place/place_checkpoint.cpp +++ b/vpr/src/place/place_checkpoint.cpp @@ -42,7 +42,7 @@ void restore_best_placement(PlacerState& placer_state, t_placer_costs& costs, std::unique_ptr<PlacerCriticalities>& placer_criticalities, std::unique_ptr<PlacerSetupSlacks>& placer_setup_slacks, - std::unique_ptr<PlaceDelayModel>& place_delay_model, + std::shared_ptr<PlaceDelayModel>& place_delay_model, std::unique_ptr<NetPinTimingInvalidator>& pin_timing_invalidator, PlaceCritParams crit_params, std::optional<NocCostHandler>& noc_cost_handler) { diff --git a/vpr/src/place/place_checkpoint.h b/vpr/src/place/place_checkpoint.h index 8c2313e7117..9a3fe76d5d8 100644 --- a/vpr/src/place/place_checkpoint.h +++ b/vpr/src/place/place_checkpoint.h @@ -74,7 +74,7 @@ void restore_best_placement(PlacerState& placer_state, t_placer_costs& costs, std::unique_ptr<PlacerCriticalities>& placer_criticalities, std::unique_ptr<PlacerSetupSlacks>& placer_setup_slacks, - std::unique_ptr<PlaceDelayModel>& place_delay_model, + std::shared_ptr<PlaceDelayModel>& place_delay_model, std::unique_ptr<NetPinTimingInvalidator>& pin_timing_invalidator, PlaceCritParams crit_params, std::optional<NocCostHandler>& noc_cost_handler); diff --git a/vpr/src/place/place_timing_update.cpp b/vpr/src/place/place_timing_update.cpp index d558f386c4b..c9c53b88f90 100644 --- a/vpr/src/place/place_timing_update.cpp +++ b/vpr/src/place/place_timing_update.cpp @@ -45,7 +45,7 @@ void initialize_timing_info(const PlaceCritParams& crit_params, //by passing in all the clb sink pins for (ClusterNetId net_id : clb_nlist.nets()) { for (ClusterPinId pin_id : clb_nlist.net_sinks(net_id)) { - pin_timing_invalidator->invalidate_connection(pin_id, timing_info); + pin_timing_invalidator->invalidate_connection(pin_id); } } @@ -142,10 +142,10 @@ void update_timing_classes(const PlaceCritParams& crit_params, timing_info->update(); /* Update the placer's criticalities (e.g. sharpen with crit_exponent). */ - criticalities->update_criticalities(timing_info, crit_params, placer_state); + criticalities->update_criticalities(crit_params, placer_state); /* Update the placer's raw setup slacks. */ - setup_slacks->update_setup_slacks(timing_info); + setup_slacks->update_setup_slacks(); /* Clear invalidation state. */ pin_timing_invalidator->reset(); diff --git a/vpr/src/place/placement_log_printer.cpp b/vpr/src/place/placement_log_printer.cpp new file mode 100644 index 00000000000..a4eafb3b30f --- /dev/null +++ b/vpr/src/place/placement_log_printer.cpp @@ -0,0 +1,320 @@ + +#include "placement_log_printer.h" + +#include "vtr_log.h" +#include "annealer.h" +#include "place_util.h" +#include "PostClusterDelayCalculator.h" +#include "tatum/TimingReporter.hpp" +#include "VprTimingGraphResolver.h" +#include "timing_info.h" +#include "placer.h" +#include "draw.h" +#include "read_place.h" +#include "tatum/echo_writer.hpp" + +PlacementLogPrinter::PlacementLogPrinter(const Placer& placer, bool quiet) + : placer_(placer) + , quiet_(quiet) + , msg_(quiet ? 0 : vtr::bufsize) {} + +void PlacementLogPrinter::print_place_status_header() const { + if (quiet_) { + return; + } + + const bool noc_enabled = placer_.noc_opts_.noc; + + VTR_LOG("\n"); + if (!noc_enabled) { + VTR_LOG( + "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n"); + VTR_LOG( + "Tnum Time T Av Cost Av BB Cost Av TD Cost CPD sTNS sWNS Ac Rate Std Dev R lim Crit Exp Tot Moves Alpha\n"); + VTR_LOG( + " (sec) (ns) (ns) (ns) \n"); + VTR_LOG( + "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n"); + } else { + VTR_LOG( + "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- --------- ---------\n"); + VTR_LOG( + "Tnum Time T Av Cost Av BB Cost Av TD Cost CPD sTNS sWNS Ac Rate Std Dev R lim Crit Exp Tot Moves Alpha Agg. BW Agg. Lat Lat Over. NoC Cong.\n"); + VTR_LOG( + " (sec) (ns) (ns) (ns) (bps) (ns) (ns) \n"); + VTR_LOG( + "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- --------- ---------\n"); + } +} + +void PlacementLogPrinter::print_place_status(float elapsed_sec) const { + if (quiet_) { + return; + } + + const PlacementAnnealer& annealer = *placer_.annealer_; + const t_annealing_state& annealing_state = annealer.get_annealing_state(); + const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); + const int tot_moves = annealer.get_total_iteration(); + const t_placer_costs& costs = placer_.costs_; + std::shared_ptr<const SetupTimingInfo> timing_info = placer_.timing_info_; + + const bool noc_enabled = placer_.noc_opts_.noc; + const NocCostTerms& noc_cost_terms = placer_.costs_.noc_cost_terms; + + const bool is_timing_driven = placer_.placer_opts_.place_algorithm.is_timing_driven(); + const float cpd = is_timing_driven ? placer_.critical_path_.delay() : std::numeric_limits<float>::quiet_NaN(); + const float sTNS = is_timing_driven ? placer_.timing_info_->setup_total_negative_slack() : std::numeric_limits<float>::quiet_NaN(); + const float sWNS = is_timing_driven ? placer_.timing_info_->setup_worst_negative_slack() : std::numeric_limits<float>::quiet_NaN(); + + VTR_LOG( + "%4zu %6.1f %7.1e " + "%7.3f %10.2f %-10.5g " + "%7.3f % 10.3g % 8.3f " + "%7.3f %7.4f %6.1f %8.2f", + annealing_state.num_temps, elapsed_sec, annealing_state.t, + placer_stats.av_cost, placer_stats.av_bb_cost, placer_stats.av_timing_cost, + 1e9 * cpd, 1e9 * sTNS, 1e9 * sWNS, + placer_stats.success_rate, placer_stats.std_dev, annealing_state.rlim, annealing_state.crit_exponent); + + pretty_print_uint(" ", tot_moves, 9, 3); + + VTR_LOG(" %6.3f", annealing_state.alpha); + + if (noc_enabled) { + VTR_LOG( + " %7.2e %7.2e" + " %8.2e %8.2f", + noc_cost_terms.aggregate_bandwidth, noc_cost_terms.latency, + noc_cost_terms.latency_overrun, noc_cost_terms.congestion); + } + + VTR_LOG("\n"); + fflush(stdout); + + sprintf(msg_.data(), "Cost: %g BB Cost %g TD Cost %g Temperature: %g", + costs.cost, costs.bb_cost, costs.timing_cost, annealing_state.t); + + update_screen(ScreenUpdatePriority::MINOR, msg_.data(), PLACEMENT, timing_info); +} + +void PlacementLogPrinter::print_resources_utilization() const { + if (quiet_) { + return; + } + + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& device_ctx = g_vpr_ctx.device(); + const auto& block_locs = placer_.placer_state_.block_locs(); + + size_t max_block_name = 0; + size_t max_tile_name = 0; + + //Record the resource requirement + std::map<t_logical_block_type_ptr, size_t> num_type_instances; + std::map<t_logical_block_type_ptr, std::map<t_physical_tile_type_ptr, size_t>> num_placed_instances; + + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { + const t_pl_loc& loc = block_locs[blk_id].loc; + + t_physical_tile_type_ptr physical_tile = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer}); + t_logical_block_type_ptr logical_block = cluster_ctx.clb_nlist.block_type(blk_id); + + num_type_instances[logical_block]++; + num_placed_instances[logical_block][physical_tile]++; + + max_block_name = std::max(max_block_name, logical_block->name.length()); + max_tile_name = std::max(max_tile_name, physical_tile->name.length()); + } + + VTR_LOG("\n"); + VTR_LOG("Placement resource usage:\n"); + for (const auto [logical_block_type_ptr, _] : num_type_instances) { + for (const auto [physical_tile_type_ptr, num_instances] : num_placed_instances[logical_block_type_ptr]) { + VTR_LOG(" %-*s implemented as %-*s: %d\n", max_block_name, + logical_block_type_ptr->name.c_str(), max_tile_name, + physical_tile_type_ptr->name.c_str(), num_instances); + } + } + VTR_LOG("\n"); +} + +void PlacementLogPrinter::print_placement_swaps_stats() const { + if (quiet_) { + return; + } + + const PlacementAnnealer& annealer = *placer_.annealer_; + const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); + const t_annealing_state& annealing_state = annealer.get_annealing_state(); + + size_t total_swap_attempts = swap_stats.num_swap_rejected + swap_stats.num_swap_accepted + swap_stats.num_swap_aborted; + VTR_ASSERT(total_swap_attempts > 0); + + size_t num_swap_print_digits = ceil(log10(total_swap_attempts)); + float reject_rate = (float)swap_stats.num_swap_rejected / total_swap_attempts; + float accept_rate = (float)swap_stats.num_swap_accepted / total_swap_attempts; + float abort_rate = (float)swap_stats.num_swap_aborted / total_swap_attempts; + VTR_LOG("Placement number of temperatures: %d\n", annealing_state.num_temps); + VTR_LOG("Placement total # of swap attempts: %*d\n", num_swap_print_digits, + total_swap_attempts); + VTR_LOG("\tSwaps accepted: %*d (%4.1f %%)\n", num_swap_print_digits, + swap_stats.num_swap_accepted, 100 * accept_rate); + VTR_LOG("\tSwaps rejected: %*d (%4.1f %%)\n", num_swap_print_digits, + swap_stats.num_swap_rejected, 100 * reject_rate); + VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits, + swap_stats.num_swap_aborted, 100 * abort_rate); +} + +void PlacementLogPrinter::print_initial_placement_stats() const { + if (quiet_) { + return; + } + + const t_placer_costs& costs = placer_.costs_; + const t_placer_opts& placer_opts = placer_.placer_opts_; + std::shared_ptr<const SetupTimingInfo> timing_info = placer_.timing_info_; + + VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", + costs.cost, costs.bb_cost, costs.timing_cost); + + if (placer_.noc_opts_.noc) { + VTR_ASSERT(placer_.noc_cost_handler_.has_value()); + placer_.noc_cost_handler_->print_noc_costs("Initial NoC Placement Costs", costs, placer_.noc_opts_); + } + + if (placer_opts.place_algorithm.is_timing_driven()) { + VTR_LOG("Initial placement estimated Critical Path Delay (CPD): %g ns\n", + 1e9 * placer_.critical_path_.delay()); + VTR_LOG("Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n", + 1e9 * timing_info->setup_total_negative_slack()); + VTR_LOG("Initial placement estimated setup Worst Negative Slack (sWNS): %g ns\n", + 1e9 * timing_info->setup_worst_negative_slack()); + VTR_LOG("\n"); + VTR_LOG("Initial placement estimated setup slack histogram:\n"); + print_histogram(create_setup_slack_histogram(*timing_info->setup_analyzer())); + } + + const BlkLocRegistry& blk_loc_registry = placer_.placer_state_.blk_loc_registry(); + size_t num_macro_members = 0; + for (const t_pl_macro& macro : blk_loc_registry.place_macros().macros()) { + num_macro_members += macro.members.size(); + } + VTR_LOG("Placement contains %zu placement macros involving %zu blocks (average macro size %f)\n", + blk_loc_registry.place_macros().macros().size(), num_macro_members, + float(num_macro_members) / blk_loc_registry.place_macros().macros().size()); + VTR_LOG("\n"); + + sprintf(msg_.data(), + "Initial Placement. Cost: %g BB Cost: %g TD Cost %g \t Channel Factor: %d", + costs.cost, costs.bb_cost, costs.timing_cost, placer_opts.place_chan_width); + + // Draw the initial placement + update_screen(ScreenUpdatePriority::MAJOR, msg_.data(), PLACEMENT, timing_info); + + if (placer_opts.placement_saves_per_temperature >= 1) { + std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0, 0); + VTR_LOG("Saving initial placement to file: %s\n", filename.c_str()); + print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); + } +} + +void PlacementLogPrinter::print_post_placement_stats() const { + if (quiet_) { + return; + } + + const auto& timing_ctx = g_vpr_ctx.timing(); + const auto& [swap_stats, move_type_stats, placer_stats] = placer_.annealer_->get_stats(); + + VTR_LOG("\n"); + VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called); + placer_.annealer_->get_move_abortion_logger().report_aborted_moves(); + + if (placer_.placer_opts_.place_algorithm.is_timing_driven()) { + //Final timing estimate + VTR_ASSERT(placer_.timing_info_); + + if (isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) { + tatum::write_echo(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH), + *timing_ctx.graph, *timing_ctx.constraints, + *placer_.placement_delay_calc_, placer_.timing_info_->analyzer()); + + tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(placer_.analysis_opts_.echo_dot_timing_graph_node); + write_setup_timing_graph_dot(getEchoFileName(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"), + *placer_.timing_info_, debug_tnode); + } + + generate_post_place_timing_reports(placer_.placer_opts_, placer_.analysis_opts_, *placer_.timing_info_, + *placer_.placement_delay_calc_, /*is_flat=*/false, placer_.placer_state_.blk_loc_registry()); + + // Print critical path delay metrics + VTR_LOG("\n"); + print_setup_timing_summary(*timing_ctx.constraints, + *placer_.timing_info_->setup_analyzer(), "Placement estimated ", ""); + } + + sprintf(msg_.data(), + "Placement. Cost: %g bb_cost: %g td_cost: %g Channel Factor: %d", + placer_.costs_.cost, placer_.costs_.bb_cost, placer_.costs_.timing_cost, placer_.placer_opts_.place_chan_width); + VTR_LOG("Placement cost: %g, bb_cost: %g, td_cost: %g, \n", placer_.costs_.cost, + placer_.costs_.bb_cost, placer_.costs_.timing_cost); + update_screen(ScreenUpdatePriority::MAJOR, msg_.data(), PLACEMENT, placer_.timing_info_); + + // print the noc costs info + if (placer_.noc_opts_.noc) { + VTR_ASSERT(placer_.noc_cost_handler_.has_value()); + placer_.noc_cost_handler_->print_noc_costs("\nNoC Placement Costs", placer_.costs_, placer_.noc_opts_); + + // TODO: move this to an appropriate file +#ifdef ENABLE_NOC_SAT_ROUTING + if (costs.noc_cost_terms.congestion > 0.0) { + VTR_LOG("NoC routing configuration is congested. Invoking the SAT NoC router.\n"); + invoke_sat_router(costs, noc_opts, placer_opts.seed); + } +#endif //ENABLE_NOC_SAT_ROUTING + } + + // Print out swap statistics and resource utilization + print_resources_utilization(); + print_placement_swaps_stats(); + + move_type_stats.print_placement_move_types_stats(); + + if (placer_.noc_opts_.noc) { + write_noc_placement_file(placer_.noc_opts_.noc_placement_file_name, + placer_.placer_state_.block_locs()); + } + + print_timing_stats("Placement Quench", placer_.post_quench_timing_stats_, placer_.pre_quench_timing_stats_); + print_timing_stats("Placement Total ", timing_ctx.stats, placer_.pre_place_timing_stats_); + + const auto& p_runtime_ctx = placer_.placer_state_.runtime(); + VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n", + p_runtime_ctx.f_update_td_costs_connections_elapsed_sec, + p_runtime_ctx.f_update_td_costs_nets_elapsed_sec, + p_runtime_ctx.f_update_td_costs_sum_nets_elapsed_sec, + p_runtime_ctx.f_update_td_costs_total_elapsed_sec); +} + +void generate_post_place_timing_reports(const t_placer_opts& placer_opts, + const t_analysis_opts& analysis_opts, + const SetupTimingInfo& timing_info, + const PlacementDelayCalculator& delay_calc, + bool is_flat, + const BlkLocRegistry& blk_loc_registry) { + const auto& timing_ctx = g_vpr_ctx.timing(); + const auto& atom_ctx = g_vpr_ctx.atom(); + + VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, + delay_calc, is_flat, blk_loc_registry); + resolver.set_detail_level(analysis_opts.timing_report_detail); + + tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph, + *timing_ctx.constraints); + + timing_reporter.report_timing_setup( + placer_opts.post_place_timing_report_file, + *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths); +} + diff --git a/vpr/src/place/placement_log_printer.h b/vpr/src/place/placement_log_printer.h new file mode 100644 index 00000000000..d538c20d895 --- /dev/null +++ b/vpr/src/place/placement_log_printer.h @@ -0,0 +1,84 @@ +/** + * @file placement_log_printer.h + * @brief Declares the PlacementLogPrinter class and associated utilities for logging + * and reporting placement-related statistics and timing analysis results. + + * ### Integration: + * The PlacementLogPrinter class integrates with the Placer class to provide information about + * the placement process for debugging, optimization, and analysis purposes. + */ + +#pragma once + +#include <cstddef> +#include <vector> + +#include "timing_info_fwd.h" +#include "PlacementDelayCalculator.h" + +class t_annealing_state; +class t_placer_statistics; +struct t_placer_opts; +struct t_analysis_opts; +struct NocCostTerms; +struct t_swap_stats; +class BlkLocRegistry; +class Placer; + +/** + * @class PlacementLogPrinter + * @brief A utility class for logging placement status and + * updating the screen view when graphics are enabled. + */ +class PlacementLogPrinter { + public: + /** + * @param placer The placer object from which the placement status is retrieved. + * @param quiet When set true, the logger doesn't print any information. + */ + PlacementLogPrinter(const Placer& placer, + bool quiet); + + /** + * @brief Prints the placement status header that shows which metrics are reported + * in each iteration of the annealer's outer loop. + * @details This method should be called once before the first call to print_place_status(). + */ + void print_place_status_header() const; + + /** + * @brief Print placement metrics and elapsed time after each outer loop iteration of the annealer. + * If graphics are on, the function will the screen view. + * @param elapsed_sec Time spent in the latest outer loop iteration. + */ + void print_place_status(float elapsed_sec) const; + + /// Reports the resource utilization for each block type. + void print_resources_utilization() const; + /// Reports the number of tried temperatures, total swaps, and how many were accepted or rejected. + void print_placement_swaps_stats() const; + /// Reports placement metrics after the initial placement. + void print_initial_placement_stats() const; + /// Prints final placement metrics and generates timing reports. + void print_post_placement_stats() const; + + private: + /** + * @brief A constant reference to the Placer object to access the placement status. + * @details PlacementLogPrinter is a friend class for the Placer class, so it can + * access all its private data members. This reference is made constant to avoid + * any accidental modification of the Placer object. + */ + const Placer& placer_; + /// Specifies whether this object prints logs and updates the graphics. + const bool quiet_; + /// A string buffer to carry the message to shown in the graphical interface. + mutable std::vector<char> msg_; +}; + +void generate_post_place_timing_reports(const t_placer_opts& placer_opts, + const t_analysis_opts& analysis_opts, + const SetupTimingInfo& timing_info, + const PlacementDelayCalculator& delay_calc, + bool is_flat, + const BlkLocRegistry& blk_loc_registry); diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp new file mode 100644 index 00000000000..37b48f11d0d --- /dev/null +++ b/vpr/src/place/placer.cpp @@ -0,0 +1,387 @@ + +#include "placer.h" + +#include <utility> + +#include "vtr_time.h" +#include "draw.h" +#include "read_place.h" +#include "analytic_placer.h" +#include "initial_placement.h" +#include "concrete_timing_info.h" +#include "verify_placement.h" +#include "place_timing_update.h" +#include "annealer.h" +#include "RL_agent_util.h" +#include "place_checkpoint.h" +#include "tatum/echo_writer.hpp" + +Placer::Placer(const Netlist<>& net_list, + const t_placer_opts& placer_opts, + const t_analysis_opts& analysis_opts, + const t_noc_opts& noc_opts, + const IntraLbPbPinLookup& pb_gpin_lookup, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + const std::vector<t_direct_inf>& directs, + std::shared_ptr<PlaceDelayModel> place_delay_model, + bool cube_bb, + bool is_flat, + bool quiet) + : placer_opts_(placer_opts) + , analysis_opts_(analysis_opts) + , noc_opts_(noc_opts) + , pb_gpin_lookup_(pb_gpin_lookup) + , netlist_pin_lookup_(netlist_pin_lookup) + , costs_(placer_opts.place_algorithm, noc_opts.noc) + , placer_state_(placer_opts.place_algorithm.is_timing_driven(), cube_bb) + , rng_(placer_opts.seed) + , net_cost_handler_(placer_opts, placer_state_, cube_bb) + , place_delay_model_(std::move(place_delay_model)) + , log_printer_(*this, quiet) + , is_flat_(is_flat) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + + pre_place_timing_stats_ = g_vpr_ctx.timing().stats; + + init_placement_context(placer_state_.mutable_blk_loc_registry(), directs); + + // create a NoC cost handler if NoC optimization is enabled + if (noc_opts.noc) { + noc_cost_handler_.emplace(placer_state_.block_locs()); + } + + /* To make sure the importance of NoC-related cost terms compared to + * BB and timing cost is determine only through NoC placement weighting factor, + * we normalize NoC-related cost weighting factors so that they add up to 1. + * With this normalization, NoC-related cost weighting factors only determine + * the relative importance of NoC cost terms with respect to each other, while + * the importance of total NoC cost to conventional placement cost is determined + * by NoC placement weighting factor. + */ + if (noc_opts.noc) { + normalize_noc_cost_weighting_factor(const_cast<t_noc_opts&>(noc_opts)); + } + + BlkLocRegistry& blk_loc_registry = placer_state_.mutable_blk_loc_registry(); + initial_placement(placer_opts, placer_opts.constraints_file.c_str(), + noc_opts, blk_loc_registry, noc_cost_handler_, rng_); + + const int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333)); + //create the move generator based on the chosen placement strategy + auto [move_generator, move_generator2] = create_move_generators(placer_state_, placer_opts, move_lim, noc_opts.noc_centroid_weight, rng_); + + if (!placer_opts.write_initial_place_file.empty()) { + print_place(nullptr, nullptr, placer_opts.write_initial_place_file.c_str(), placer_state_.block_locs()); + } + +#ifdef ENABLE_ANALYTIC_PLACE + /* + * Cluster-level Analytic Placer: + * Passes in the initial_placement via vpr_context, and passes its placement back via locations marked on + * both the clb_netlist and the gird. + * Most of anneal is disabled later by setting initial temperature to 0 and only further optimizes in quench + */ + if (placer_opts.enable_analytic_placer) { + AnalyticPlacer{blk_loc_registry}.ap_place(); + } + +#endif /* ENABLE_ANALYTIC_PLACE */ + + // Update physical pin values + for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { + blk_loc_registry.place_sync_external_block_connections(block_id); + } + + if (!quiet) { +#ifndef NO_GRAPHICS + if (noc_cost_handler_.has_value()) { + get_draw_state_vars()->set_noc_link_bandwidth_usages_ref(noc_cost_handler_->get_link_bandwidth_usages()); + } +#endif + + // width_fac gives the width of the widest channel + const int width_fac = placer_opts.place_chan_width; + init_draw_coords((float)width_fac, placer_state_.blk_loc_registry()); + } + + // Gets initial cost and loads bounding boxes. + costs_.bb_cost = net_cost_handler_.comp_bb_cost(e_cost_methods::NORMAL); + costs_.bb_cost_norm = 1 / costs_.bb_cost; + + if (placer_opts.place_algorithm.is_timing_driven()) { + alloc_and_init_timing_objects_(net_list, analysis_opts); + } else { + VTR_ASSERT(placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); + // Timing cost and normalization factors are not used + constexpr double INVALID_COST = std::numeric_limits<double>::quiet_NaN(); + costs_.timing_cost = INVALID_COST; + costs_.timing_cost_norm = INVALID_COST; + } + + if (noc_opts.noc) { + VTR_ASSERT(noc_cost_handler_.has_value()); + + // get the costs associated with the NoC + costs_.noc_cost_terms.aggregate_bandwidth = noc_cost_handler_->comp_noc_aggregate_bandwidth_cost(); + std::tie(costs_.noc_cost_terms.latency, costs_.noc_cost_terms.latency_overrun) = noc_cost_handler_->comp_noc_latency_cost(); + costs_.noc_cost_terms.congestion = noc_cost_handler_->comp_noc_congestion_cost(); + + // initialize all the noc normalization factors + noc_cost_handler_->update_noc_normalization_factors(costs_); + } + + // set the starting total placement cost + costs_.cost = costs_.get_total_cost(placer_opts, noc_opts); + + // Sanity check that initial placement is legal + check_place_(); + + log_printer_.print_initial_placement_stats(); + + annealer_ = std::make_unique<PlacementAnnealer>(placer_opts_, placer_state_, costs_, net_cost_handler_, noc_cost_handler_, + noc_opts_, rng_, std::move(move_generator), std::move(move_generator2), place_delay_model_.get(), + placer_criticalities_.get(), placer_setup_slacks_.get(), timing_info_.get(), pin_timing_invalidator_.get(), + move_lim); +} + +void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list, + const t_analysis_opts& analysis_opts) { + const auto& atom_ctx = g_vpr_ctx.atom(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& timing_ctx = g_vpr_ctx.timing(); + const auto& p_timing_ctx = placer_state_.timing(); + + // Update the point-to-point delays from the initial placement + comp_td_connection_delays(place_delay_model_.get(), placer_state_); + + // Initialize timing analysis + placement_delay_calc_ = std::make_shared<PlacementDelayCalculator>(atom_ctx.nlist, + atom_ctx.lookup, + p_timing_ctx.connection_delay, + is_flat_); + placement_delay_calc_->set_tsu_margin_relative(placer_opts_.tsu_rel_margin); + placement_delay_calc_->set_tsu_margin_absolute(placer_opts_.tsu_abs_margin); + + timing_info_ = make_setup_timing_info(placement_delay_calc_, placer_opts_.timing_update_type); + + placer_setup_slacks_ = std::make_unique<PlacerSetupSlacks>(cluster_ctx.clb_nlist, + netlist_pin_lookup_, + timing_info_); + + placer_criticalities_ = std::make_unique<PlacerCriticalities>(cluster_ctx.clb_nlist, + netlist_pin_lookup_, + timing_info_); + + pin_timing_invalidator_ = make_net_pin_timing_invalidator(placer_opts_.timing_update_type, + net_list, + netlist_pin_lookup_, + atom_ctx.nlist, + atom_ctx.lookup, + timing_info_, + is_flat_); + + // First time compute timing and costs, compute from scratch + PlaceCritParams crit_params; + crit_params.crit_exponent = placer_opts_.td_place_exp_first; + crit_params.crit_limit = placer_opts_.place_crit_limit; + + initialize_timing_info(crit_params, place_delay_model_.get(), placer_criticalities_.get(), + placer_setup_slacks_.get(), pin_timing_invalidator_.get(), + timing_info_.get(), &costs_, placer_state_); + + critical_path_ = timing_info_->least_slack_critical_path(); + + // Write out the initial timing echo file + if (isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)) { + tatum::write_echo(getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH), + *timing_ctx.graph, *timing_ctx.constraints, + *placement_delay_calc_, timing_info_->analyzer()); + + tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node); + + write_setup_timing_graph_dot(getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"), + *timing_info_, debug_tnode); + } + + costs_.timing_cost_norm = 1 / costs_.timing_cost; +} + +void Placer::check_place_() { + const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + const DeviceGrid& device_grid = g_vpr_ctx.device().grid; + const auto& cluster_constraints = g_vpr_ctx.floorplanning().cluster_constraints; + + int error = 0; + + // Verify the placement invariants independent to the placement flow. + error += verify_placement(placer_state_.blk_loc_registry(), + clb_nlist, + device_grid, + cluster_constraints); + + error += check_placement_costs_(); + + if (noc_opts_.noc) { + // check the NoC costs during placement if the user is using the NoC supported flow + error += noc_cost_handler_->check_noc_placement_costs(costs_, PL_INCREMENTAL_COST_TOLERANCE, noc_opts_); + // make sure NoC routing configuration does not create any cycles in CDG + error += (int)noc_cost_handler_->noc_routing_has_cycle(); + } + + if (error == 0) { + VTR_LOG("\n"); + VTR_LOG("Completed placement consistency check successfully.\n"); + + } else { + VPR_ERROR(VPR_ERROR_PLACE, + "\nCompleted placement consistency check, %d errors found.\n" + "Aborting program.\n", + error); + } +} + +int Placer::check_placement_costs_() { + int error = 0; + double timing_cost_check; + + double bb_cost_check = net_cost_handler_.comp_bb_cost(e_cost_methods::CHECK); + + if (fabs(bb_cost_check - costs_.bb_cost) > costs_.bb_cost * PL_INCREMENTAL_COST_TOLERANCE) { + VTR_LOG_ERROR( + "bb_cost_check: %g and bb_cost: %g differ in check_place.\n", + bb_cost_check, costs_.bb_cost); + error++; + } + + if (placer_opts_.place_algorithm.is_timing_driven()) { + comp_td_costs(place_delay_model_.get(), *placer_criticalities_, placer_state_, &timing_cost_check); + //VTR_LOG("timing_cost recomputed from scratch: %g\n", timing_cost_check); + if (fabs(timing_cost_check - costs_.timing_cost) > costs_.timing_cost * PL_INCREMENTAL_COST_TOLERANCE) { + VTR_LOG_ERROR( + "timing_cost_check: %g and timing_cost: %g differ in check_place.\n", + timing_cost_check, costs_.timing_cost); + error++; + } + } + return error; +} + +void Placer::place() { + const auto& timing_ctx = g_vpr_ctx.timing(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + + + bool skip_anneal = false; +#ifdef ENABLE_ANALYTIC_PLACE + // Cluster-level analytic placer: when enabled, skip most of the annealing and go straight to quench + if (placer_opts_.enable_analytic_placer) { + skip_anneal = true; + } +#endif + + if (!skip_anneal) { + // Table header + log_printer_.print_place_status_header(); + + // Outer loop of the simulated annealing begins + do { + vtr::Timer temperature_timer; + + annealer_->outer_loop_update_timing_info(); + + if (placer_opts_.place_algorithm.is_timing_driven()) { + critical_path_ = timing_info_->least_slack_critical_path(); + + // see if we should save the current placement solution as a checkpoint + if (placer_opts_.place_checkpointing && annealer_->get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) { + save_placement_checkpoint_if_needed(placer_state_.mutable_block_locs(), + placement_checkpoint_, + timing_info_, costs_, critical_path_.delay()); + } + } + + // do a complete inner loop iteration + annealer_->placement_inner_loop(); + + log_printer_.print_place_status(temperature_timer.elapsed_sec()); + + // Outer loop of the simulated annealing ends + } while (annealer_->outer_loop_update_state()); + } //skip_anneal ends + + // Start Quench + annealer_->start_quench(); + + pre_quench_timing_stats_ = timing_ctx.stats; + { // Quench + vtr::ScopedFinishTimer temperature_timer("Placement Quench"); + + annealer_->outer_loop_update_timing_info(); + + /* Run inner loop again with temperature = 0 so as to accept only swaps + * which reduce the cost of the placement */ + annealer_->placement_inner_loop(); + + if (placer_opts_.place_quench_algorithm.is_timing_driven()) { + critical_path_ = timing_info_->least_slack_critical_path(); + } + + log_printer_.print_place_status(temperature_timer.elapsed_sec()); + } + post_quench_timing_stats_ = timing_ctx.stats; + + // Final timing analysis + const t_annealing_state& annealing_state = annealer_->get_annealing_state(); + PlaceCritParams crit_params; + crit_params.crit_exponent = annealing_state.crit_exponent; + crit_params.crit_limit = placer_opts_.place_crit_limit; + + if (placer_opts_.place_algorithm.is_timing_driven()) { + perform_full_timing_update(crit_params, place_delay_model_.get(), placer_criticalities_.get(), + placer_setup_slacks_.get(), pin_timing_invalidator_.get(), + timing_info_.get(), &costs_, placer_state_); + + critical_path_ = timing_info_->least_slack_critical_path(); + + VTR_LOG("post-quench CPD = %g (ns) \n", + 1e9 * critical_path_.delay()); + } + + // See if our latest checkpoint is better than the current placement solution + if (placer_opts_.place_checkpointing) { + restore_best_placement(placer_state_, + placement_checkpoint_, timing_info_, costs_, + placer_criticalities_, placer_setup_slacks_, place_delay_model_, + pin_timing_invalidator_, crit_params, noc_cost_handler_); + } + + if (placer_opts_.placement_saves_per_temperature >= 1) { + std::string filename = vtr::string_fmt("placement_%03d_%03d.place", + annealing_state.num_temps + 1, 0); + VTR_LOG("Saving final placement to file: %s\n", filename.c_str()); + print_place(nullptr, nullptr, filename.c_str(), placer_state_.mutable_block_locs()); + } + + // Update physical pin values + for (const ClusterBlockId block_id : cluster_ctx.clb_nlist.blocks()) { + placer_state_.mutable_blk_loc_registry().place_sync_external_block_connections(block_id); + } + + check_place_(); + + log_printer_.print_post_placement_stats(); +} + +void Placer::copy_locs_to_global_state(PlacementContext& place_ctx) { + // the placement location variables should be unlocked before being accessed + place_ctx.unlock_loc_vars(); + + // copy the local location variables into the global state + auto& global_blk_loc_registry = place_ctx.mutable_blk_loc_registry(); + global_blk_loc_registry = placer_state_.blk_loc_registry(); + +#ifndef NO_GRAPHICS + // update the graphics' reference to placement location variables + get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry); +#endif +} diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h new file mode 100644 index 00000000000..99c00d7e8e5 --- /dev/null +++ b/vpr/src/place/placer.h @@ -0,0 +1,179 @@ +/** + * @file placer.h + * @brief Declares the Placer class, which encapsulates the functionality, data structures, + * and algorithms required for the (annealing-based) placement stage + * + * The Placer class initializes necessary objects, performs an initial placement, + * and runs simulated annealing optimization. This optimization minimizes + * wirelength (bounding box) and timing costs to achieve an efficient placement solution. + * + * Key features of the Placer class: + * - Encapsulates all placement-related variables, cost functions, and data structures. + * - Supports optional NoC (Network-on-Chip) cost optimizations if enabled. + * - Interfaces with timing analysis, placement delay calculation. + * - Provides a mechanism for checkpointing the placement state. + * - Includes debugging and validation utilities to verify the correctness of placement. + */ + +#pragma once + +#include <memory> +#include <optional> + +#include "timing_place.h" +#include "place_checkpoint.h" +#include "PlacementDelayCalculator.h" +#include "placer_state.h" +#include "noc_place_utils.h" +#include "net_cost_handler.h" +#include "placement_log_printer.h" + +class PlacementAnnealer; +namespace vtr{ +class ScopedStartFinishTimer; +} + +class Placer { + public: + Placer(const Netlist<>& net_list, + const t_placer_opts& placer_opts, + const t_analysis_opts& analysis_opts, + const t_noc_opts& noc_opts, + const IntraLbPbPinLookup& pb_gpin_lookup, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + const std::vector<t_direct_inf>& directs, + std::shared_ptr<PlaceDelayModel> place_delay_model, + bool cube_bb, + bool is_flat, + bool quiet); + + /** + * @brief Executes the simulated annealing algorithm to optimize placement. + * + * This function minimizes placement costs, including bounding box and timing costs, + * using simulated annealing. During the process, it periodically updates timing information + * and saves a checkpoint of the best placement encountered. + * + * After the simulated annealing completes, the final placement is evaluated against the + * checkpoint. If the final placement's quality is worse than the checkpoint, the checkpoint + * is restored. The final placement is then validated for legality. + */ + void place(); + + /** + * @brief Copies the placement location variables into the given global placement context. + * @param place_ctx The placement context to which location information will be copied. + */ + void copy_locs_to_global_state(PlacementContext& place_ctx); + + private: + /// Holds placement algorithm parameters + const t_placer_opts& placer_opts_; + /// Holds timing analysis parameters + const t_analysis_opts& analysis_opts_; + /// Holds NoC-related parameters + const t_noc_opts& noc_opts_; + /// Enables fast look-up pb graph pins from block pin indices + const IntraLbPbPinLookup& pb_gpin_lookup_; + /// Enables fast look-up of atom pins connect to CLB pins + const ClusteredPinAtomPinsLookup& netlist_pin_lookup_; + /// Placement cost terms with their normalization factors and total cost + t_placer_costs costs_; + /// Holds timing, runtime, and block location information + PlacerState placer_state_; + /// Random number generator used to select random blocks and locations + vtr::RngContainer rng_; + /// Computes and updates net bounding box cost + NetCostHandler net_cost_handler_; + /// Compute and updates NoC-related cost terms if NoC optimization is enabled + std::optional<NocCostHandler> noc_cost_handler_; + /// A delay model shared between multiple instances of this class. + std::shared_ptr<PlaceDelayModel> place_delay_model_; + /// Prints logs during placement + const PlacementLogPrinter log_printer_; + /// Indicates if flat routing resource graph and delay model is used. It should be false. + const bool is_flat_; + + /// Stores a placement state as a retrievable checkpoint in case the placement quality deteriorates later. + t_placement_checkpoint placement_checkpoint_; + /** + * @brief Holds a setup timing analysis engine. + * Other placement timing objects like PlacerSetupSlacks, PlacerCriticalities, and NetPinTimingInvalidator + * have a pointer to timing_info. A shared pointer is used to manage the lifetime of the object. + */ + std::shared_ptr<SetupTimingInfo> timing_info_; + /// Post-clustering delay calculator. Its API allows extraction of delay for each timing edge. + std::shared_ptr<PlacementDelayCalculator> placement_delay_calc_; + /// Stores setup slack of the clustered netlist connections. + std::unique_ptr<PlacerSetupSlacks> placer_setup_slacks_; + /// Stores criticalities of the clustered netlist connections. + std::unique_ptr<PlacerCriticalities> placer_criticalities_; + /// Used to invalidate timing edges corresponding to the pins of moved blocks. + std::unique_ptr<NetPinTimingInvalidator> pin_timing_invalidator_; + /// Stores information about the critical path. This is usually updated after that timing info is updated. + tatum::TimingPathInfo critical_path_; + + /// Performs random swaps and implements the simulated annealer optimizer. + std::unique_ptr<PlacementAnnealer> annealer_; + + /* These variables store timing analysis profiling information + * at different stages of the placement to be printed at the end + */ + t_timing_analysis_profile_info pre_place_timing_stats_; + t_timing_analysis_profile_info pre_quench_timing_stats_; + t_timing_analysis_profile_info post_quench_timing_stats_; + + /* PlacementLogPrinter is made a friend of this class, so it can + * access its private member variables without getter methods. + * PlacementLogPrinter holds a constant reference to an object of type + * Placer to avoid modifying its member variables. + */ + friend class PlacementLogPrinter; + + private: + /** + * @brief Constructs and initializes timing-related objects. + * + * This function performs the following steps to set up timing analysis: + * + * 1. Constructs a `tatum::DelayCalculator` for post-clustering delay calculations. + * This calculator holds a reference to `PlacerTimingContext::connection_delay`, + * which contains net delays based on block locations. + * + * 2. Creates and stores a `SetupTimingInfo` object in `timing_info_`. + * This object utilizes the delay calculator to compute delays on timing edges + * and calculate setup times. + * + * 3. Constructs `PlacerSetupSlacks` and `PlacerCriticalities` objects, + * which translate arrival and required times into slacks and criticalities, + * respectively. These objects hold pointers to timing_info_. + * + * 4. Creates a `NetPinTimingInvalidator` object to mark timing edges + * corresponding to the pins of moved blocks as invalid. This object + * holds a pointer to timing_info_. + * + * 5. Performs a full timing analysis by marking all pins as invalid. + * + * @param net_list The netlist used for iterating over pins. + * @param analysis_opts Analysis options, including whether to echo the timing graph. + */ + void alloc_and_init_timing_objects_(const Netlist<>& net_list, + const t_analysis_opts& analysis_opts); + + /** + * Checks that the placement has not confused our data structures. + * i.e. the clb and block structures agree about the locations of + * every block, blocks are in legal spots, etc. Also recomputes + * the final placement cost from scratch and makes sure it is + * within round-off of what we think the cost is. + */ + void check_place_(); + + /** + * Computes bounding box and timing cost to ensure it is + * within a small error margin what we thing the cost is. + * @return Number cost elements, i.e. BB and timing, that falls + * outside the acceptable round-off error margin. + */ + int check_placement_costs_(); +}; diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp index 021bb6211fb..badd9d1fb61 100644 --- a/vpr/src/place/timing_place.cpp +++ b/vpr/src/place/timing_place.cpp @@ -16,9 +16,12 @@ #include "timing_info.h" ///@brief Allocates space for the timing_place_crit_ data structure. -PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup) +PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr<const SetupTimingInfo> timing_info) : clb_nlist_(clb_nlist) , pin_lookup_(netlist_pin_lookup) + , timing_info_(std::move(timing_info)) , timing_place_crit_(make_net_pins_matrix(clb_nlist_, std::numeric_limits<float>::quiet_NaN())) { } @@ -32,8 +35,7 @@ PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, cons * * If the criticality exponent has changed, we also need to update from scratch. */ -void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_info, - const PlaceCritParams& crit_params, +void PlacerCriticalities::update_criticalities(const PlaceCritParams& crit_params, PlacerState& placer_state) { /* If update is not enabled, exit the routine. */ if (!update_enabled) { @@ -44,7 +46,7 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf /* Determine what pins need updating */ if (!recompute_required && crit_params.crit_exponent == last_crit_exponent_) { - incr_update_criticalities(timing_info); + incr_update_criticalities(); } else { recompute_criticalities(); @@ -63,7 +65,7 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); // Routing for placement is not flat (at least for the time being) - float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info, pin_lookup_, ParentPinId(size_t(clb_pin)), /*is_flat=*/false); + float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info_, pin_lookup_, ParentPinId(size_t(clb_pin)), /*is_flat=*/false); float new_crit = pow(clb_pin_crit, crit_params.crit_exponent); /* @@ -114,10 +116,10 @@ void PlacerCriticalities::set_recompute_required() { * atom pin criticalities. */ -void PlacerCriticalities::incr_update_criticalities(const SetupTimingInfo* timing_info) { +void PlacerCriticalities::incr_update_criticalities() { cluster_pins_with_modified_criticality_.clear(); - for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_criticality()) { + for (AtomPinId atom_pin : timing_info_->pins_with_modified_setup_criticality()) { ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin); //Some atom pins correspond to connections which are completely @@ -164,9 +166,12 @@ PlacerCriticalities::pin_range PlacerCriticalities::pins_with_modified_criticali /**************************************/ ///@brief Allocates space for the timing_place_setup_slacks_ data structure. -PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup) +PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr<const SetupTimingInfo> timing_info) : clb_nlist_(clb_nlist) , pin_lookup_(netlist_pin_lookup) + , timing_info_(std::move(timing_info)) , timing_place_setup_slacks_(make_net_pins_matrix(clb_nlist_, std::numeric_limits<float>::quiet_NaN())) { } @@ -180,7 +185,7 @@ PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const Cl * In this case, `recompute_required` would be true, and we update all setup slacks * from scratch. */ -void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info) { +void PlacerSetupSlacks::update_setup_slacks() { /* If update is not enabled, exit the routine. */ if (!update_enabled) { /* re-computation is required on the next iteration */ @@ -190,7 +195,7 @@ void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info) /* Determine what pins need updating */ if (!recompute_required) { - incr_update_setup_slacks(timing_info); + incr_update_setup_slacks(); } else { recompute_setup_slacks(); } @@ -200,7 +205,7 @@ void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info) ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); - float clb_pin_setup_slack = calculate_clb_net_pin_setup_slack(*timing_info, pin_lookup_, clb_pin); + float clb_pin_setup_slack = calculate_clb_net_pin_setup_slack(*timing_info_, pin_lookup_, clb_pin); timing_place_setup_slacks_[clb_net][pin_index_in_net] = clb_pin_setup_slack; } @@ -217,10 +222,10 @@ void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info) * Note we use the set of pins reported by the *timing_info* as having modified * setup slacks, rather than those marked as modified by the timing analyzer. */ -void PlacerSetupSlacks::incr_update_setup_slacks(const SetupTimingInfo* timing_info) { +void PlacerSetupSlacks::incr_update_setup_slacks() { cluster_pins_with_modified_setup_slack_.clear(); - for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_slack()) { + for (AtomPinId atom_pin : timing_info_->pins_with_modified_setup_slack()) { ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin); //Some atom pins correspond to connections which are completely diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h index 852c1aa6297..71e144334ad 100644 --- a/vpr/src/place/timing_place.h +++ b/vpr/src/place/timing_place.h @@ -101,7 +101,9 @@ class PlacerCriticalities { typedef vtr::Range<net_iterator> net_range; public: //Lifetime - PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup); + PlacerCriticalities(const ClusteredNetlist& clb_nlist, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr<const SetupTimingInfo> timing_info); PlacerCriticalities(const PlacerCriticalities&) = delete; PlacerCriticalities& operator=(const PlacerCriticalities&) = delete; @@ -125,8 +127,7 @@ class PlacerCriticalities { * If out of sync, then the criticalities cannot be incrementally updated on * during the next timing analysis iteration. */ - void update_criticalities(const SetupTimingInfo* timing_info, - const PlaceCritParams& crit_params, + void update_criticalities(const PlaceCritParams& crit_params, PlacerState& placer_state); ///@bried Enable the recompute_required flag to enforce from scratch update. @@ -151,6 +152,9 @@ class PlacerCriticalities { ///@brief The lookup table that maps atom pins to clb pins. const ClusteredPinAtomPinsLookup& pin_lookup_; + ///@brief A pointer to the setup timing analyzer + std::shared_ptr<const SetupTimingInfo> timing_info_; + /** * @brief The matrix that stores criticality value for each connection. * @@ -168,7 +172,7 @@ class PlacerCriticalities { vtr::vec_id_set<ClusterPinId> cluster_pins_with_modified_criticality_; ///@brief Incremental update. See timing_place.cpp for more. - void incr_update_criticalities(const SetupTimingInfo* timing_info); + void incr_update_criticalities(); ///@brief Flag that turns on/off the update_criticalities() routine. bool update_enabled = true; @@ -215,7 +219,9 @@ class PlacerSetupSlacks { typedef vtr::Range<net_iterator> net_range; public: //Lifetime - PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup); + PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr<const SetupTimingInfo> timing_info); PlacerSetupSlacks(const PlacerSetupSlacks& clb_nlist) = delete; PlacerSetupSlacks& operator=(const PlacerSetupSlacks& clb_nlist) = delete; @@ -232,14 +238,14 @@ class PlacerSetupSlacks { public: //Modifiers /** * @brief Updates setup slacks based on the atom netlist setup slacks provided - * by timing_info. + * by timing_info_. * * Should consistently call this method after the most recent timing analysis to * keep the setup slacks stored in this class in sync with the timing analyzer. * If out of sync, then the setup slacks cannot be incrementally updated on * during the next timing analysis iteration. */ - void update_setup_slacks(const SetupTimingInfo* timing_info); + void update_setup_slacks(); ///@bried Enable the recompute_required flag to enforce from scratch update. void set_recompute_required() { recompute_required = true; } @@ -256,6 +262,7 @@ class PlacerSetupSlacks { private: //Data const ClusteredNetlist& clb_nlist_; const ClusteredPinAtomPinsLookup& pin_lookup_; + std::shared_ptr<const SetupTimingInfo> timing_info_; /** * @brief The matrix that stores raw setup slack values for each connection. @@ -268,7 +275,7 @@ class PlacerSetupSlacks { vtr::vec_id_set<ClusterPinId> cluster_pins_with_modified_setup_slack_; ///@brief Incremental update. See timing_place.cpp for more. - void incr_update_setup_slacks(const SetupTimingInfo* timing_info); + void incr_update_setup_slacks(); ///@brief Incremental update. See timing_place.cpp for more. void recompute_setup_slacks(); diff --git a/vpr/src/route/route.cpp b/vpr/src/route/route.cpp index d4dbc2a4d55..08ef1892a49 100644 --- a/vpr/src/route/route.cpp +++ b/vpr/src/route/route.cpp @@ -202,7 +202,7 @@ bool route(const Netlist<>& net_list, netlist_pin_lookup, atom_ctx.nlist, atom_ctx.lookup, - *timing_info->timing_graph(), + timing_info, is_flat); std::unique_ptr<NetlistRouter> netlist_router = make_netlist_router( diff --git a/vpr/src/route/route_net.h b/vpr/src/route/route_net.h index fcfd5607582..f996be8b64c 100644 --- a/vpr/src/route/route_net.h +++ b/vpr/src/route/route_net.h @@ -93,7 +93,7 @@ inline void update_net_delay_from_isink(float* net_delay, //Delay changed, invalidate for incremental timing update VTR_ASSERT_SAFE(timing_info); ParentPinId pin = net_list.net_pin(inet, isink); - pin_timing_invalidator->invalidate_connection(pin, timing_info); + pin_timing_invalidator->invalidate_connection(pin); } net_delay[isink] = new_delay; diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp index 88cd75d46c8..a3d468b901c 100644 --- a/vpr/src/route/router_lookahead_map.cpp +++ b/vpr/src/route/router_lookahead_map.cpp @@ -507,12 +507,12 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm auto& grid = device_ctx.grid; //Re-allocate - f_wire_cost_map = t_wire_cost_map({static_cast<unsigned long>(grid.get_num_layers()), - static_cast<unsigned long>(grid.get_num_layers()), - 2, - segment_inf_vec.size(), - device_ctx.grid.width(), - device_ctx.grid.height()}); + f_wire_cost_map = t_wire_cost_map({static_cast<unsigned long>(grid.get_num_layers()), + static_cast<unsigned long>(grid.get_num_layers()), + 2, + segment_inf_vec.size(), + device_ctx.grid.width(), + device_ctx.grid.height()}); int longest_seg_length = 0; for (const auto& seg_inf : segment_inf_vec) { @@ -536,7 +536,7 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm chan_type, segment_inf, std::unordered_map<int, std::unordered_set<int>>(), - true); + /*sample_all_locs=*/true); if (routing_cost_map.empty()) { continue; } diff --git a/vpr/src/timing/NetPinTimingInvalidator.h b/vpr/src/timing/NetPinTimingInvalidator.h index 754d118aef2..c76a075cb74 100644 --- a/vpr/src/timing/NetPinTimingInvalidator.h +++ b/vpr/src/timing/NetPinTimingInvalidator.h @@ -20,7 +20,7 @@ class NetPinTimingInvalidator { typedef vtr::Range<const tatum::EdgeId*> tedge_range; virtual ~NetPinTimingInvalidator() = default; virtual tedge_range pin_timing_edges(ParentPinId /* pin */) const = 0; - virtual void invalidate_connection(ParentPinId /* pin */, TimingInfo* /* timing_info */) = 0; + virtual void invalidate_connection(ParentPinId /* pin */) = 0; virtual void reset() = 0; /** @@ -32,12 +32,10 @@ class NetPinTimingInvalidator { * Invalidate all the timing graph edges associated with these connections via * the NetPinTimingInvalidator class. */ - void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, TimingInfo* timing_info) { - VTR_ASSERT_SAFE(timing_info); - + void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected) { // Invalidate timing graph edges affected by the move for (ClusterPinId pin : blocks_affected.affected_pins) { - invalidate_connection(pin, timing_info); + invalidate_connection(pin); } } }; @@ -54,15 +52,17 @@ class IncrNetPinTimingInvalidator : public NetPinTimingInvalidator { const ClusteredPinAtomPinsLookup& clb_atom_pin_lookup, const AtomNetlist& atom_nlist, const AtomLookup& atom_lookup, - const tatum::TimingGraph& timing_graph, - bool is_flat) { + std::shared_ptr<TimingInfo> timing_info, + bool is_flat) + : timing_info_(std::move(timing_info)) { + size_t num_pins = net_list.pins().size(); pin_first_edge_.reserve(num_pins + 1); //Exact timing_edges_.reserve(num_pins + 1); //Lower bound for (ParentPinId pin_id : net_list.pins()) { pin_first_edge_.push_back(timing_edges_.size()); if (is_flat) { - tatum::EdgeId tedge = atom_pin_to_timing_edge(timing_graph, atom_nlist, atom_lookup, convert_to_atom_pin_id(pin_id)); + tatum::EdgeId tedge = atom_pin_to_timing_edge(*timing_info_->timing_graph(), atom_nlist, atom_lookup, convert_to_atom_pin_id(pin_id)); if (!tedge) { continue; @@ -73,7 +73,7 @@ class IncrNetPinTimingInvalidator : public NetPinTimingInvalidator { auto cluster_pin_id = convert_to_cluster_pin_id(pin_id); auto atom_pins = clb_atom_pin_lookup.connected_atom_pins(cluster_pin_id); for (const AtomPinId atom_pin : atom_pins) { - tatum::EdgeId tedge = atom_pin_to_timing_edge(timing_graph, atom_nlist, atom_lookup, atom_pin); + tatum::EdgeId tedge = atom_pin_to_timing_edge(*timing_info_->timing_graph(), atom_nlist, atom_lookup, atom_pin); if (!tedge) { continue; @@ -101,11 +101,11 @@ class IncrNetPinTimingInvalidator : public NetPinTimingInvalidator { /** Invalidates all timing edges associated with the clustered netlist connection * driving the specified pin. * Is concurrently safe. */ - void invalidate_connection(ParentPinId pin, TimingInfo* timing_info) { + void invalidate_connection(ParentPinId pin) { if (invalidated_pins_.count(pin)) return; //Already invalidated for (tatum::EdgeId edge : pin_timing_edges(pin)) { - timing_info->invalidate_delay(edge); + timing_info_->invalidate_delay(edge); } invalidated_pins_.insert(pin); @@ -146,6 +146,7 @@ class IncrNetPinTimingInvalidator : public NetPinTimingInvalidator { } private: + std::shared_ptr<TimingInfo> timing_info_; std::vector<int> pin_first_edge_; //Indices into timing_edges corresponding std::vector<tatum::EdgeId> timing_edges_; @@ -167,7 +168,7 @@ class NoopNetPinTimingInvalidator : public NetPinTimingInvalidator { return vtr::make_range((const tatum::EdgeId*)nullptr, (const tatum::EdgeId*)nullptr); } - void invalidate_connection(ParentPinId /* pin */, TimingInfo* /* timing_info */) { + void invalidate_connection(ParentPinId /* pin */) { } void reset() { @@ -181,12 +182,13 @@ inline std::unique_ptr<NetPinTimingInvalidator> make_net_pin_timing_invalidator( const ClusteredPinAtomPinsLookup& clb_atom_pin_lookup, const AtomNetlist& atom_nlist, const AtomLookup& atom_lookup, - const tatum::TimingGraph& timing_graph, + const std::shared_ptr<TimingInfo>& timing_info, bool is_flat) { if (update_type == e_timing_update_type::FULL || update_type == e_timing_update_type::AUTO) { return std::make_unique<NoopNetPinTimingInvalidator>(); } else { VTR_ASSERT(update_type == e_timing_update_type::INCREMENTAL); - return std::make_unique<IncrNetPinTimingInvalidator>(net_list, clb_atom_pin_lookup, atom_nlist, atom_lookup, timing_graph, is_flat); + return std::make_unique<IncrNetPinTimingInvalidator>(net_list, clb_atom_pin_lookup, atom_nlist, + atom_lookup, timing_info, is_flat); } } \ No newline at end of file diff --git a/vpr/src/timing/concrete_timing_info.h b/vpr/src/timing/concrete_timing_info.h index 9aaae0d82ff..ce02e2abe90 100644 --- a/vpr/src/timing/concrete_timing_info.h +++ b/vpr/src/timing/concrete_timing_info.h @@ -454,7 +454,7 @@ class ConstantTimingInfo : public SetupHoldTimingInfo { /** Create a SetupTimingInfo for the given delay calculator */ template<class DelayCalc> std::unique_ptr<SetupTimingInfo> make_setup_timing_info(std::shared_ptr<DelayCalc> delay_calculator, e_timing_update_type update_type) { - auto& timing_ctx = g_vpr_ctx.timing(); + const auto& timing_ctx = g_vpr_ctx.timing(); std::shared_ptr<tatum::SetupTimingAnalyzer> analyzer; diff --git a/vpr/src/timing/timing_info.h b/vpr/src/timing/timing_info.h index 14d3b08f939..fbd21cbf1bc 100644 --- a/vpr/src/timing/timing_info.h +++ b/vpr/src/timing/timing_info.h @@ -7,7 +7,7 @@ #include "tatum/timing_paths.hpp" #include "timing_util.h" -//Generic inteface which provides functionality to update (but not +//Generic interface which provides functionality to update (but not //access) timing information. // //This is useful for algorithms which know they need to update timing @@ -62,7 +62,7 @@ class SetupTimingInfo : public virtual TimingInfo { //Return the critical path with the least slack virtual tatum::TimingPathInfo least_slack_critical_path() const = 0; - //Return the critical path the the longest absolute delay + //Return the critical path the longest absolute delay virtual tatum::TimingPathInfo longest_critical_path() const = 0; //Return the set of critical paths between all clock domain pairs diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp index a210c0dbdcd..e51fadf5d42 100644 --- a/vpr/src/timing/timing_util.cpp +++ b/vpr/src/timing/timing_util.cpp @@ -1,5 +1,6 @@ #include <fstream> #include <sstream> +#include <utility> #include "vtr_log.h" #include "vtr_assert.h" @@ -30,7 +31,7 @@ tatum::TimingPathInfo find_longest_critical_path_delay(const tatum::TimingConstr auto cpds = tatum::find_critical_paths(*timing_ctx.graph, constraints, setup_analyzer); - //Record the maximum critical path accross all domain pairs + //Record the maximum critical path across all domain pairs for (const auto& path_info : cpds) { if (crit_path_info.delay() < path_info.delay() || std::isnan(crit_path_info.delay())) { crit_path_info = path_info; @@ -47,7 +48,7 @@ tatum::TimingPathInfo find_least_slack_critical_path_delay(const tatum::TimingCo auto cpds = tatum::find_critical_paths(*timing_ctx.graph, constraints, setup_analyzer); - //Record the maximum critical path accross all domain pairs + //Record the maximum critical path across all domain pairs for (const auto& path_info : cpds) { if (path_info.slack() < crit_path_info.slack() || std::isnan(crit_path_info.slack())) { crit_path_info = path_info; @@ -234,7 +235,7 @@ TimingStats::TimingStats(std::string pref, double cpd, double f_max, double swns fmax = f_max; setup_worst_neg_slack = swns; setup_total_neg_slack = stns; - prefix = pref; + prefix = std::move(pref); } void TimingStats::write(OutputFormat fmt, std::ostream& output) const { @@ -255,23 +256,23 @@ void TimingStats::write(OutputFormat fmt, std::ostream& output) const { } } -void write_setup_timing_summary(std::string timing_summary_filename, const TimingStats& stats) { - if (timing_summary_filename.size() > 0) { +void write_setup_timing_summary(std::string_view timing_summary_filename, const TimingStats& stats) { + if (!timing_summary_filename.empty()) { TimingStats::OutputFormat fmt; - if (vtr::check_file_name_extension(timing_summary_filename.c_str(), ".json")) { + if (vtr::check_file_name_extension(timing_summary_filename.data(), ".json")) { fmt = TimingStats::OutputFormat::JSON; - } else if (vtr::check_file_name_extension(timing_summary_filename.c_str(), ".xml")) { + } else if (vtr::check_file_name_extension(timing_summary_filename.data(), ".xml")) { fmt = TimingStats::OutputFormat::XML; - } else if (vtr::check_file_name_extension(timing_summary_filename.c_str(), ".txt")) { + } else if (vtr::check_file_name_extension(timing_summary_filename.data(), ".txt")) { fmt = TimingStats::OutputFormat::HumanReadable; } else { - VPR_FATAL_ERROR(VPR_ERROR_PACK, "Unknown extension on output %s", timing_summary_filename.c_str()); + VPR_FATAL_ERROR(VPR_ERROR_PACK, "Unknown extension on output %s", timing_summary_filename.data()); } std::fstream fp; - fp.open(timing_summary_filename, std::fstream::out | std::fstream::trunc); + fp.open(timing_summary_filename.data(), std::fstream::out | std::fstream::trunc); stats.write(fmt, fp); fp.close(); } @@ -279,8 +280,8 @@ void write_setup_timing_summary(std::string timing_summary_filename, const Timin void print_setup_timing_summary(const tatum::TimingConstraints& constraints, const tatum::SetupTimingAnalyzer& setup_analyzer, - std::string prefix, - std::string timing_summary_filename) { + std::string_view prefix, + std::string_view timing_summary_filename) { auto& timing_ctx = g_vpr_ctx.timing(); auto crit_paths = tatum::find_critical_paths(*timing_ctx.graph, constraints, setup_analyzer); @@ -292,12 +293,12 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, double setup_worst_neg_slack = sec_to_nanosec(find_setup_worst_negative_slack(setup_analyzer)); double setup_total_neg_slack = sec_to_nanosec(find_setup_total_negative_slack(setup_analyzer)); - const auto stats = TimingStats(prefix, least_slack_cpd_delay, fmax, + const auto stats = TimingStats(prefix.data(), least_slack_cpd_delay, fmax, setup_worst_neg_slack, setup_total_neg_slack); if (!timing_summary_filename.empty()) write_setup_timing_summary(timing_summary_filename, stats); - VTR_LOG("%scritical path delay (least slack): %g ns", prefix.c_str(), least_slack_cpd_delay); + VTR_LOG("%scritical path delay (least slack): %g ns", prefix.data(), least_slack_cpd_delay); if (crit_paths.size() == 1) { //Fmax is only meaningful for a single-clock circuit @@ -305,11 +306,11 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, } VTR_LOG("\n"); - VTR_LOG("%ssetup Worst Negative Slack (sWNS): %g ns\n", prefix.c_str(), setup_worst_neg_slack); - VTR_LOG("%ssetup Total Negative Slack (sTNS): %g ns\n", prefix.c_str(), setup_total_neg_slack); + VTR_LOG("%ssetup Worst Negative Slack (sWNS): %g ns\n", prefix.data(), setup_worst_neg_slack); + VTR_LOG("%ssetup Total Negative Slack (sTNS): %g ns\n", prefix.data(), setup_total_neg_slack); VTR_LOG("\n"); - VTR_LOG("%ssetup slack histogram:\n", prefix.c_str()); + VTR_LOG("%ssetup slack histogram:\n", prefix.data()); print_histogram(create_setup_slack_histogram(setup_analyzer)); if (crit_paths.size() > 1) { @@ -317,7 +318,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, VTR_LOG("\n"); //Periods per constraint - VTR_LOG("%sintra-domain critical path delays (CPDs):\n", prefix.c_str()); + VTR_LOG("%sintra-domain critical path delays (CPDs):\n", prefix.data()); for (const auto& path : crit_paths) { if (path.launch_domain() == path.capture_domain()) { VTR_LOG(" %s to %s CPD: %g ns (%g MHz)\n", @@ -329,7 +330,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, } VTR_LOG("\n"); - VTR_LOG("%sinter-domain critical path delays (CPDs):\n", prefix.c_str()); + VTR_LOG("%sinter-domain critical path delays (CPDs):\n", prefix.data()); for (const auto& path : crit_paths) { if (path.launch_domain() != path.capture_domain()) { VTR_LOG(" %s to %s CPD: %g ns (%g MHz)\n", @@ -342,7 +343,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, VTR_LOG("\n"); //Slack per constraint - VTR_LOG("%sintra-domain worst setup slacks per constraint:\n", prefix.c_str()); + VTR_LOG("%sintra-domain worst setup slacks per constraint:\n", prefix.data()); for (const auto& path : crit_paths) { if (path.launch_domain() == path.capture_domain()) { VTR_LOG(" %s to %s worst setup slack: %g ns\n", @@ -353,7 +354,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, } VTR_LOG("\n"); - VTR_LOG("%sinter-domain worst setup slacks per constraint:\n", prefix.c_str()); + VTR_LOG("%sinter-domain worst setup slacks per constraint:\n", prefix.data()); for (const auto& path : crit_paths) { if (path.launch_domain() != path.capture_domain()) { VTR_LOG(" %s to %s worst setup slack: %g ns\n", @@ -374,7 +375,7 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, if (path.launch_domain() == path.capture_domain() && !constraints.is_virtual_clock(path.launch_domain())) { if (path.delay() == 0.) { VTR_LOG_WARN("%s%s to %s CPD is %g, skipping in geomean and fanout-weighted CPDs\n", - prefix.c_str(), + prefix.data(), constraints.clock_domain_name(path.launch_domain()).c_str(), constraints.clock_domain_name(path.capture_domain()).c_str(), sec_to_nanosec(path.delay())); @@ -394,11 +395,11 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, //Print multi-clock geomeans double geomean_intra_domain_cpd = std::numeric_limits<double>::quiet_NaN(); - if (intra_domain_cpds.size() > 0) { + if (!intra_domain_cpds.empty()) { geomean_intra_domain_cpd = vtr::geomean(intra_domain_cpds.begin(), intra_domain_cpds.end()); } VTR_LOG("%sgeomean non-virtual intra-domain period: %g ns (%g MHz)\n", - prefix.c_str(), + prefix.data(), sec_to_nanosec(geomean_intra_domain_cpd), sec_to_mhz(geomean_intra_domain_cpd)); @@ -408,13 +409,13 @@ void print_setup_timing_summary(const tatum::TimingConstraints& constraints, } double fanout_weighted_geomean_intra_domain_cpd = std::numeric_limits<double>::quiet_NaN(); - if (fanout_weighted_intra_domain_cpds.size() > 0) { + if (!fanout_weighted_intra_domain_cpds.empty()) { fanout_weighted_geomean_intra_domain_cpd = vtr::geomean(fanout_weighted_intra_domain_cpds.begin(), fanout_weighted_intra_domain_cpds.end()); } VTR_LOG("%sfanout-weighted geomean non-virtual intra-domain period: %g ns (%g MHz)\n", - prefix.c_str(), + prefix.data(), sec_to_nanosec(fanout_weighted_geomean_intra_domain_cpd), sec_to_mhz(fanout_weighted_geomean_intra_domain_cpd)); @@ -605,20 +606,22 @@ std::vector<HistogramBucket> create_hold_slack_histogram(const tatum::HoldTiming return histogram; } -void print_hold_timing_summary(const tatum::TimingConstraints& constraints, const tatum::HoldTimingAnalyzer& hold_analyzer, std::string prefix) { +void print_hold_timing_summary(const tatum::TimingConstraints& constraints, + const tatum::HoldTimingAnalyzer& hold_analyzer, + std::string_view prefix) { auto& timing_ctx = g_vpr_ctx.timing(); auto hold_worst_neg_slack = sec_to_nanosec(find_hold_worst_negative_slack(hold_analyzer)); auto hold_total_neg_slack = sec_to_nanosec(find_hold_total_negative_slack(hold_analyzer)); - VTR_LOG("%shold Worst Negative Slack (hWNS): %g ns\n", prefix.c_str(), hold_worst_neg_slack); - VTR_LOG("%shold Total Negative Slack (hTNS): %g ns\n", prefix.c_str(), hold_total_neg_slack); + VTR_LOG("%shold Worst Negative Slack (hWNS): %g ns\n", prefix.data(), hold_worst_neg_slack); + VTR_LOG("%shold Total Negative Slack (hTNS): %g ns\n", prefix.data(), hold_total_neg_slack); /*For testing*/ //VTR_LOG("Hold Total Negative Slack within clbs: %g ns\n", sec_to_nanosec(find_total_negative_slack_within_clb_blocks(hold_analyzer))); VTR_LOG("\n"); - VTR_LOG("%shold slack histogram:\n", prefix.c_str()); + VTR_LOG("%shold slack histogram:\n", prefix.data()); print_histogram(create_hold_slack_histogram(hold_analyzer)); if (constraints.clock_domains().size() > 1) { @@ -626,7 +629,7 @@ void print_hold_timing_summary(const tatum::TimingConstraints& constraints, cons VTR_LOG("\n"); //Slack per constraint - VTR_LOG("%sintra-domain worst hold slacks per constraint:\n", prefix.c_str()); + VTR_LOG("%sintra-domain worst hold slacks per constraint:\n", prefix.data()); for (const auto& domain : constraints.clock_domains()) { float worst_slack = find_hold_worst_slack(hold_analyzer, domain, domain); @@ -639,7 +642,7 @@ void print_hold_timing_summary(const tatum::TimingConstraints& constraints, cons } VTR_LOG("\n"); - VTR_LOG("%sinter-domain worst hold slacks per constraint:\n", prefix.c_str()); + VTR_LOG("%sinter-domain worst hold slacks per constraint:\n", prefix.data()); for (const auto& launch_domain : constraints.clock_domains()) { for (const auto& capture_domain : constraints.clock_domains()) { if (launch_domain != capture_domain) { @@ -816,13 +819,13 @@ float calc_relaxed_criticality(const std::map<DomainPair, float>& domains_max_re return max_crit; } -void print_tatum_cpds(std::vector<tatum::TimingPathInfo> cpds) { +void print_tatum_cpds(const std::vector<tatum::TimingPathInfo>& cpds) { for (auto path : cpds) { VTR_LOG("Tatum %zu -> %zu: least_slack=%g cpd=%g\n", size_t(path.launch_domain()), size_t(path.capture_domain()), float(path.slack()), float(path.delay())); } } -tatum::NodeId id_or_pin_name_to_tnode(std::string pin_name_or_tnode) { +tatum::NodeId id_or_pin_name_to_tnode(const std::string& pin_name_or_tnode) { std::istringstream ss(pin_name_or_tnode); int id; if (ss >> id) { //Successfully converted @@ -837,7 +840,7 @@ tatum::NodeId id_or_pin_name_to_tnode(std::string pin_name_or_tnode) { return pin_name_to_tnode(pin_name_or_tnode); } -tatum::NodeId pin_name_to_tnode(std::string pin_name) { +tatum::NodeId pin_name_to_tnode(const std::string& pin_name) { auto& atom_ctx = g_vpr_ctx.atom(); AtomPinId pin = atom_ctx.nlist.find_pin(pin_name); @@ -855,7 +858,7 @@ tatum::NodeId pin_name_to_tnode(std::string pin_name) { return tnode; } -void write_setup_timing_graph_dot(std::string filename, SetupTimingInfo& timing_info, tatum::NodeId debug_node) { +void write_setup_timing_graph_dot(const std::string& filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node) { auto& timing_graph = *timing_info.timing_graph(); auto dot_writer = tatum::make_graphviz_dot_writer(timing_graph, *timing_info.delay_calculator()); @@ -874,7 +877,7 @@ void write_setup_timing_graph_dot(std::string filename, SetupTimingInfo& timing_ dot_writer.write_dot_file(filename, *timing_info.setup_analyzer()); } -void write_hold_timing_graph_dot(std::string filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node) { +void write_hold_timing_graph_dot(const std::string& filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node) { auto& timing_graph = *timing_info.timing_graph(); auto dot_writer = tatum::make_graphviz_dot_writer(timing_graph, *timing_info.delay_calculator()); diff --git a/vpr/src/timing/timing_util.h b/vpr/src/timing/timing_util.h index e4d45c84213..e0d011214ba 100644 --- a/vpr/src/timing/timing_util.h +++ b/vpr/src/timing/timing_util.h @@ -1,6 +1,7 @@ #ifndef VPR_TIMING_UTIL_H #define VPR_TIMING_UTIL_H #include <vector> +#include <string_view> #include "netlist_fwd.h" #include "tatum/timing_analyzers.hpp" @@ -49,7 +50,10 @@ std::vector<HistogramBucket> create_criticality_histogram(const Netlist<>& net_l size_t num_bins = 10); //Print a useful summary of timing information -void print_setup_timing_summary(const tatum::TimingConstraints& constraints, const tatum::SetupTimingAnalyzer& setup_analyzer, std::string prefix, std::string timing_summary_filename); +void print_setup_timing_summary(const tatum::TimingConstraints& constraints, + const tatum::SetupTimingAnalyzer& setup_analyzer, + std::string_view prefix, + std::string_view timing_summary_filename); /* * Hold-time related statistics @@ -67,7 +71,9 @@ float find_hold_worst_slack(const tatum::HoldTimingAnalyzer& hold_analyzer, cons std::vector<HistogramBucket> create_hold_slack_histogram(const tatum::HoldTimingAnalyzer& hold_analyzer, size_t num_bins = 10); //Print a useful summary of timing information -void print_hold_timing_summary(const tatum::TimingConstraints& constraints, const tatum::HoldTimingAnalyzer& hold_analyzer, std::string prefix); +void print_hold_timing_summary(const tatum::TimingConstraints& constraints, + const tatum::HoldTimingAnalyzer& hold_analyzer, + std::string_view prefix); float find_total_negative_slack_within_clb_blocks(const tatum::HoldTimingAnalyzer& hold_analyzer); @@ -114,13 +120,13 @@ float calc_relaxed_criticality(const std::map<DomainPair, float>& domains_max_re /* * Debug */ -void print_tatum_cpds(std::vector<tatum::TimingPathInfo> cpds); +void print_tatum_cpds(const std::vector<tatum::TimingPathInfo>& cpds); -tatum::NodeId id_or_pin_name_to_tnode(std::string name_or_id); -tatum::NodeId pin_name_to_tnode(std::string name); +tatum::NodeId id_or_pin_name_to_tnode(const std::string& name_or_id); +tatum::NodeId pin_name_to_tnode(const std::string& name); -void write_setup_timing_graph_dot(std::string filename, SetupTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID()); -void write_hold_timing_graph_dot(std::string filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID()); +void write_setup_timing_graph_dot(const std::string& filename, const SetupTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID()); +void write_hold_timing_graph_dot(const std::string& filename, HoldTimingInfo& timing_info, tatum::NodeId debug_node = tatum::NodeId::INVALID()); struct TimingStats { private: @@ -147,6 +153,6 @@ struct TimingStats { }; //Write a useful summary of timing information to JSON file -void write_setup_timing_summary(std::string timing_summary_filename, const TimingStats& stats); +void write_setup_timing_summary(std::string_view timing_summary_filename, const TimingStats& stats); #endif diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index 6a4d97aefa1..8869cc55ddd 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -136,6 +136,7 @@ std::string rr_node_arch_name(RRNodeId inode, bool is_flat); //Class for looking up pb graph pins from block pin indices class IntraLbPbPinLookup { public: + IntraLbPbPinLookup() = default; IntraLbPbPinLookup(const std::vector<t_logical_block_type>& block_types); IntraLbPbPinLookup(const IntraLbPbPinLookup& rhs); IntraLbPbPinLookup& operator=(IntraLbPbPinLookup rhs);