diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 5dde5f352a6..7ff7205024a 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -5,6 +5,7 @@ #include #include +#include "prepack.h" #include "vpr_types.h" #include "vtr_ndmatrix.h" #include "vtr_optional.h" @@ -72,34 +73,17 @@ struct AtomContext : public Context { /******************************************************************** * Atom Netlist ********************************************************************/ - /** - * @brief constructor - * - * In the constructor initialize the list of pack molecules to nullptr and defines a custom deletor for it - */ - AtomContext() - : list_of_pack_molecules(nullptr, free_pack_molecules) {} - - ///@brief Atom netlist + /// @brief Atom netlist AtomNetlist nlist; - ///@brief Mappings to/from the Atom Netlist to physically described .blif models + /// @brief Mappings to/from the Atom Netlist to physically described .blif models AtomLookup lookup; - /** - * @brief The molecules associated with each atom block. - * - * This map is loaded in the pre-packing stage and freed at the very end of vpr flow run. - * The pointers in this multimap is shared with list_of_pack_molecules. - */ - std::multimap atom_molecules; - - /** - * @brief A linked list of all the packing molecules that are loaded in pre-packing stage. - * - * Is is useful in freeing the pack molecules at the destructor of the Atom context using free_pack_molecules. - */ - std::unique_ptr list_of_pack_molecules; + /// @brief Prepacker object which performs prepacking and stores the pack + /// molecules. Has a method to get the pack molecule of an AtomBlock. + /// TODO: This is mainly only used in the clusterer. It can probably be + /// removed from the AtomContext entirely. + Prepacker prepacker; }; /** diff --git a/vpr/src/base/vpr_types.cpp b/vpr/src/base/vpr_types.cpp index 64a3b7f58e4..8a802a3a234 100644 --- a/vpr/src/base/vpr_types.cpp +++ b/vpr/src/base/vpr_types.cpp @@ -452,15 +452,6 @@ void t_pb::set_atom_pin_bit_index(const t_pb_graph_pin* gpin, BitIndex atom_pin_ pin_rotations_[gpin] = atom_pin_bit_idx; } -void free_pack_molecules(t_pack_molecule* list_of_pack_molecules) { - t_pack_molecule* cur_pack_molecule = list_of_pack_molecules; - while (cur_pack_molecule != nullptr) { - cur_pack_molecule = list_of_pack_molecules->next; - delete list_of_pack_molecules; - list_of_pack_molecules = cur_pack_molecule; - } -} - /** * Free linked lists found in cluster_placement_stats_list */ diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index da91125c63d..e169a9e82a5 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1847,11 +1847,6 @@ typedef vtr::vector>> t_clb_op typedef std::vector> t_arch_switch_fanin; -/** - * @brief Free the linked list that saves all the packing molecules. - */ -void free_pack_molecules(t_pack_molecule* list_of_pack_molecules); - /** * @brief Free the linked lists to placement locations based on status of primitive inside placement stats data structure. */ diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp index 47b277f2872..a5ee38b8d0c 100644 --- a/vpr/src/pack/cluster.cpp +++ b/vpr/src/pack/cluster.cpp @@ -33,46 +33,28 @@ * The output of clustering is 400 t_pb of type BLE which represent the clustered user netlist. * Each of the 400 t_pb will reference one of the 4 BLE-type t_pb_graph_nodes. */ +#include "cluster.h" + +#include #include #include #include #include #include -#include -#include - -#include "vtr_assert.h" -#include "vtr_log.h" -#include "vtr_math.h" -#include "vtr_memory.h" -#include "vpr_types.h" -#include "vpr_error.h" - -#include "globals.h" +#include "PreClusterDelayCalculator.h" #include "atom_netlist.h" -#include "pack_types.h" -#include "cluster.h" -#include "cluster_util.h" -#include "output_clustering.h" -#include "SetupGrid.h" -#include "read_xml_arch_file.h" -#include "vpr_utils.h" -#include "cluster_placement.h" -#include "echo_files.h" #include "cluster_router.h" -#include "lb_type_rr_graph.h" - -#include "timing_info.h" -#include "timing_reports.h" -#include "PreClusterDelayCalculator.h" -#include "PreClusterTimingGraphResolver.h" -#include "tatum/echo_writer.hpp" -#include "tatum/report/graphviz_dot_writer.hpp" -#include "tatum/TimingReporter.hpp" - -#include "re_cluster_util.h" +#include "cluster_util.h" #include "constraints_report.h" +#include "globals.h" +#include "pack_types.h" +#include "prepack.h" +#include "timing_info.h" +#include "vpr_types.h" +#include "vpr_utils.h" +#include "vtr_assert.h" +#include "vtr_log.h" /* * When attraction groups are created, the purpose is to pack more densely by adding more molecules @@ -87,10 +69,9 @@ static constexpr int ATTRACTION_GROUPS_MAX_REPEATED_MOLECULES = 500; std::map do_clustering(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const t_arch* arch, - t_pack_molecule* molecule_head, + Prepacker& prepacker, const std::unordered_set& is_clock, const std::unordered_set& is_global, - const std::unordered_map& expected_lowest_cost_pb_gnode, bool allow_unrelated_clustering, bool balance_block_type_utilization, std::vector* lb_type_rr_graphs, @@ -173,11 +154,11 @@ std::map do_clustering(const t_packer_opts& pa helper_ctx.max_cluster_size = 0; max_pb_depth = 0; - const t_molecule_stats max_molecule_stats = calc_max_molecules_stats(molecule_head); + const t_molecule_stats max_molecule_stats = prepacker.calc_max_molecule_stats(atom_ctx.nlist); - mark_all_molecules_valid(molecule_head); + prepacker.mark_all_molecules_valid(); - cluster_stats.num_molecules = count_molecules(molecule_head); + cluster_stats.num_molecules = prepacker.get_num_molecules(); get_max_cluster_size_and_pb_depth(helper_ctx.max_cluster_size, max_pb_depth); @@ -193,7 +174,7 @@ std::map do_clustering(const t_packer_opts& pa check_for_duplicate_inputs (); #endif alloc_and_init_clustering(max_molecule_stats, - &(helper_ctx.cluster_placement_stats), &(helper_ctx.primitives_list), molecule_head, + &(helper_ctx.cluster_placement_stats), &(helper_ctx.primitives_list), prepacker, clustering_data, net_output_feeds_driving_block_input, unclustered_list_head_size, cluster_stats.num_molecules); @@ -213,7 +194,7 @@ std::map do_clustering(const t_packer_opts& pa vtr::vector atom_criticality(atom_ctx.nlist.blocks().size(), 0.); if (packer_opts.timing_driven) { - calc_init_packing_timing(packer_opts, analysis_opts, expected_lowest_cost_pb_gnode, + calc_init_packing_timing(packer_opts, analysis_opts, prepacker, clustering_delay_calc, timing_info, atom_criticality); } diff --git a/vpr/src/pack/cluster.h b/vpr/src/pack/cluster.h index e08e58dac50..76b2315ceae 100644 --- a/vpr/src/pack/cluster.h +++ b/vpr/src/pack/cluster.h @@ -1,6 +1,6 @@ #ifndef CLUSTER_H #define CLUSTER_H -#include + #include #include #include @@ -11,13 +11,14 @@ #include "attraction_groups.h" #include "cluster_util.h" +class Prepacker; + std::map do_clustering(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const t_arch* arch, - t_pack_molecule* molecule_head, + Prepacker& prepacker, const std::unordered_set& is_clock, const std::unordered_set& is_global, - const std::unordered_map& expected_lowest_cost_pb_gnode, bool allow_unrelated_clustering, bool balance_block_type_utilization, std::vector* lb_type_rr_graphs, diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index 9d698672cf7..8fd0bcfa56f 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -1,9 +1,17 @@ #include "cluster_util.h" +#include +#include "PreClusterTimingGraphResolver.h" +#include "PreClusterDelayCalculator.h" +#include "atom_netlist.h" #include "cluster_router.h" #include "cluster_placement.h" +#include "concrete_timing_info.h" #include "output_clustering.h" - +#include "prepack.h" +#include "tatum/TimingReporter.hpp" +#include "tatum/echo_writer.hpp" +#include "vpr_context.h" #include "vtr_math.h" #include "SetupGrid.h" @@ -175,7 +183,7 @@ void check_clustering() { //calculate the initial timing at the start of packing stage void calc_init_packing_timing(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, - const std::unordered_map& expected_lowest_cost_pb_gnode, + const Prepacker& prepacker, std::shared_ptr& clustering_delay_calc, std::shared_ptr& timing_info, vtr::vector& atom_criticality) { @@ -184,7 +192,7 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, /* * Initialize the timing analyzer */ - clustering_delay_calc = std::make_shared(atom_ctx.nlist, atom_ctx.lookup, packer_opts.inter_cluster_net_delay, expected_lowest_cost_pb_gnode); + clustering_delay_calc = std::make_shared(atom_ctx.nlist, atom_ctx.lookup, packer_opts.inter_cluster_net_delay, prepacker); timing_info = make_setup_timing_info(clustering_delay_calc, packer_opts.timing_update_type); //Calculate the initial timing @@ -496,18 +504,14 @@ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, t_cluster_placement_stats** cluster_placement_stats, t_pb_graph_node*** primitives_list, - t_pack_molecule* molecules_head, + const Prepacker& prepacker, t_clustering_data& clustering_data, std::unordered_map& net_output_feeds_driving_block_input, int& unclustered_list_head_size, int num_molecules) { /* Allocates the main data structures used for clustering and properly * * initializes them. */ - - t_molecule_link* next_ptr; - t_pack_molecule* cur_molecule; - t_pack_molecule** molecule_array; - int max_molecule_size; + const AtomContext& atom_ctx = g_vpr_ctx.atom(); /* alloc and load list of molecules to pack */ clustering_data.unclustered_list_head = new t_molecule_link[max_molecule_stats.num_used_ext_inputs + 1]; @@ -518,36 +522,32 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, clustering_data.unclustered_list_head[i].next = nullptr; } - molecule_array = new t_pack_molecule*[num_molecules]; - cur_molecule = molecules_head; - for (int i = 0; i < num_molecules; i++) { - VTR_ASSERT(cur_molecule != nullptr); - molecule_array[i] = cur_molecule; - cur_molecule = cur_molecule->next; - } - VTR_ASSERT(cur_molecule == nullptr); - qsort((void*)molecule_array, num_molecules, sizeof(t_pack_molecule*), - compare_molecule_gain); + // Create a sorted list of molecules, sorted on increasing molecule base gain. + std::vector molecules_vector = prepacker.get_molecules_vector(); + VTR_ASSERT(molecules_vector.size() == (size_t)num_molecules); + std::stable_sort(molecules_vector.begin(), + molecules_vector.end(), + [](t_pack_molecule* a, t_pack_molecule* b) { + return a->base_gain < b->base_gain; + }); clustering_data.memory_pool = new t_molecule_link[num_molecules]; - next_ptr = clustering_data.memory_pool; + t_molecule_link* next_ptr = clustering_data.memory_pool; - for (int i = 0; i < num_molecules; i++) { + for (t_pack_molecule* mol : molecules_vector) { //Figure out how many external inputs are used by this molecule - t_molecule_stats molecule_stats = calc_molecule_stats(molecule_array[i]); + t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_ctx.nlist); int ext_inps = molecule_stats.num_used_ext_inputs; //Insert the molecule into the unclustered lists by number of external inputs - next_ptr->moleculeptr = molecule_array[i]; + next_ptr->moleculeptr = mol; next_ptr->next = clustering_data.unclustered_list_head[ext_inps].next; clustering_data.unclustered_list_head[ext_inps].next = next_ptr; next_ptr++; } - delete[] molecule_array; /* load net info */ - auto& atom_ctx = g_vpr_ctx.atom(); for (AtomNetId net : atom_ctx.nlist.nets()) { AtomPinId driver_pin = atom_ctx.nlist.net_driver(net); AtomBlockId driver_block = atom_ctx.nlist.pin_block(driver_pin); @@ -568,16 +568,9 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, * primitive_list is referenced by index, for example a atom block in index 2 of a molecule matches to a primitive in index 2 in primitive_list * this array must be the size of the biggest molecule */ - max_molecule_size = 1; - cur_molecule = molecules_head; - while (cur_molecule != nullptr) { - if (cur_molecule->num_blocks > max_molecule_size) { - max_molecule_size = cur_molecule->num_blocks; - } - cur_molecule = cur_molecule->next; - } + size_t max_molecule_size = prepacker.get_max_molecule_size(); *primitives_list = new t_pb_graph_node*[max_molecule_size]; - for (int i = 0; i < max_molecule_size; i++) + for (size_t i = 0; i < max_molecule_size; i++) (*primitives_list)[i] = nullptr; } @@ -1119,12 +1112,8 @@ e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placeme for (int i = 0; i < molecule_size; i++) { if (molecule->atom_block_ids[i]) { /* invalidate all molecules that share atom block with current molecule */ - - auto rng = atom_ctx.atom_molecules.equal_range(molecule->atom_block_ids[i]); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* cur_molecule = kv.second; - cur_molecule->valid = false; - } + t_pack_molecule* cur_molecule = atom_ctx.prepacker.get_atom_molecule(molecule->atom_block_ids[i]); + cur_molecule->valid = false; commit_primitive(cluster_placement_stats_ptr, primitives_list[i]); } @@ -2324,15 +2313,12 @@ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, for (AtomBlockId blk_id : cur_pb->pb_stats->marked_blocks) { if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - auto rng = atom_ctx.atom_molecules.equal_range(blk_id); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* molecule = kv.second; - if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); - if (success) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); - } + t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + if (molecule->valid) { + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + if (success) { + add_molecule_to_pb_stats_candidates(molecule, + cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); } } } @@ -2362,16 +2348,13 @@ void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, AtomBlockId blk_id = atom_ctx.nlist.pin_block(pin_id); if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - auto rng = atom_ctx.atom_molecules.equal_range(blk_id); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* molecule = kv.second; - if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); - if (success) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_HIGH_FANOUT_EXPLORE), attraction_groups); - count++; - } + t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + if (molecule->valid) { + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + if (success) { + add_molecule_to_pb_stats_candidates(molecule, + cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_HIGH_FANOUT_EXPLORE), attraction_groups); + count++; } } } @@ -2451,15 +2434,12 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, //Only consider molecules that are unpacked and of the correct type if (atom_ctx.lookup.atom_clb(atom_id) == ClusterBlockId::INVALID() && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - auto rng = atom_ctx.atom_molecules.equal_range(atom_id); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* molecule = kv.second; - if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); - if (success) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); - } + t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(atom_id); + if (molecule->valid) { + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + if (success) { + add_molecule_to_pb_stats_candidates(molecule, + cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); } } } @@ -2486,15 +2466,12 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, //Only consider molecules that are unpacked and of the correct type if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID() && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - auto rng = atom_ctx.atom_molecules.equal_range(blk_id); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* molecule = kv.second; - if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); - if (success) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); - } + t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + if (molecule->valid) { + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + if (success) { + add_molecule_to_pb_stats_candidates(molecule, + cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); } } } @@ -2604,33 +2581,17 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, return best_molecule; } -void mark_all_molecules_valid(t_pack_molecule* molecule_head) { - for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { - cur_molecule->valid = true; - } -} - -int count_molecules(t_pack_molecule* molecule_head) { - int num_molecules = 0; - for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { - ++num_molecules; - } - return num_molecules; -} - //Calculates molecule statistics for a single molecule -t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule) { +t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const AtomNetlist& atom_nlist) { t_molecule_stats molecule_stats; - auto& atom_ctx = g_vpr_ctx.atom(); - //Calculate the number of available pins on primitives within the molecule for (auto blk : molecule->atom_block_ids) { if (!blk) continue; ++molecule_stats.num_blocks; //Record number of valid blocks in molecule - const t_model* model = atom_ctx.nlist.block_model(blk); + const t_model* model = atom_nlist.block_model(blk); for (const t_model_ports* input_port = model->inputs; input_port != nullptr; input_port = input_port->next) { molecule_stats.num_input_pins += input_port->size; @@ -2647,12 +2608,12 @@ t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule) { for (auto blk : molecule->atom_block_ids) { if (!blk) continue; - for (auto pin : atom_ctx.nlist.block_pins(blk)) { - auto net = atom_ctx.nlist.pin_net(pin); + for (auto pin : atom_nlist.block_pins(blk)) { + auto net = atom_nlist.pin_net(pin); - auto pin_type = atom_ctx.nlist.pin_type(pin); + auto pin_type = atom_nlist.pin_type(pin); if (pin_type == PinType::SINK) { - auto driver_blk = atom_ctx.nlist.net_driver_block(net); + auto driver_blk = atom_nlist.net_driver_block(net); if (molecule_atoms.count(driver_blk)) { //Pin driven by a block within the molecule @@ -2666,8 +2627,8 @@ t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule) { VTR_ASSERT(pin_type == PinType::DRIVER); bool net_leaves_molecule = false; - for (auto sink_pin : atom_ctx.nlist.net_sinks(net)) { - auto sink_blk = atom_ctx.nlist.pin_block(sink_pin); + for (auto sink_pin : atom_nlist.net_sinks(net)) { + auto sink_blk = atom_nlist.pin_block(sink_pin); if (!molecule_atoms.count(sink_blk)) { //There is at least one sink outside of the current molecule @@ -2689,33 +2650,10 @@ t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule) { return molecule_stats; } -//Calculates maximum molecule statistics accross all molecules in linked list -t_molecule_stats calc_max_molecules_stats(const t_pack_molecule* molecule_head) { - t_molecule_stats max_molecules_stats; - - for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { - //Calculate per-molecule statistics - t_molecule_stats cur_molecule_stats = calc_molecule_stats(cur_molecule); - - //Record the maximums (member-wise) over all molecules - max_molecules_stats.num_blocks = std::max(max_molecules_stats.num_blocks, cur_molecule_stats.num_blocks); - - max_molecules_stats.num_pins = std::max(max_molecules_stats.num_pins, cur_molecule_stats.num_pins); - max_molecules_stats.num_input_pins = std::max(max_molecules_stats.num_input_pins, cur_molecule_stats.num_input_pins); - max_molecules_stats.num_output_pins = std::max(max_molecules_stats.num_output_pins, cur_molecule_stats.num_output_pins); - - max_molecules_stats.num_used_ext_pins = std::max(max_molecules_stats.num_used_ext_pins, cur_molecule_stats.num_used_ext_pins); - max_molecules_stats.num_used_ext_inputs = std::max(max_molecules_stats.num_used_ext_inputs, cur_molecule_stats.num_used_ext_inputs); - max_molecules_stats.num_used_ext_outputs = std::max(max_molecules_stats.num_used_ext_outputs, cur_molecule_stats.num_used_ext_outputs); - } - - return max_molecules_stats; -} - std::vector initialize_seed_atoms(const e_cluster_seed seed_type, const t_molecule_stats& max_molecule_stats, const vtr::vector& atom_criticality) { - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); //Put all atoms in seed list std::vector seed_atoms(atom_ctx.nlist.blocks().begin(), atom_ctx.nlist.blocks().end()); @@ -2732,18 +2670,9 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, } else if (seed_type == e_cluster_seed::MAX_INPUTS) { //By number of used molecule input pins for (auto blk : atom_ctx.nlist.blocks()) { - int max_molecule_inputs = 0; - auto molecule_rng = atom_ctx.atom_molecules.equal_range(blk); - for (const auto& kv : vtr::make_range(molecule_rng.first, molecule_rng.second)) { - const t_pack_molecule* blk_mol = kv.second; - - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol); - - //Keep the max over all molecules associated with the atom - max_molecule_inputs = std::max(max_molecule_inputs, molecule_stats.num_used_ext_inputs); - } - - atom_gains[blk] = max_molecule_inputs; + const t_pack_molecule* blk_mol = atom_ctx.prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_ctx.nlist); + atom_gains[blk] = molecule_stats.num_used_ext_inputs; } } else if (seed_type == e_cluster_seed::BLEND) { @@ -2752,96 +2681,72 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, /* Score seed gain of each block as a weighted sum of timing criticality, * number of tightly coupled blocks connected to it, and number of external inputs */ float seed_blend_fac = 0.5; - float max_blend_gain = 0; - - auto molecule_rng = atom_ctx.atom_molecules.equal_range(blk); - for (const auto& kv : vtr::make_range(molecule_rng.first, molecule_rng.second)) { - const t_pack_molecule* blk_mol = kv.second; - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol); + const t_pack_molecule* blk_mol = atom_ctx.prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_ctx.nlist); + VTR_ASSERT(max_molecule_stats.num_used_ext_inputs > 0); - VTR_ASSERT(max_molecule_stats.num_used_ext_inputs > 0); - - float blend_gain = (seed_blend_fac * atom_criticality[blk] - + (1 - seed_blend_fac) * (molecule_stats.num_used_ext_inputs / max_molecule_stats.num_used_ext_inputs)); - blend_gain *= (1 + 0.2 * (molecule_stats.num_blocks - 1)); - - //Keep the max over all molecules associated with the atom - max_blend_gain = std::max(max_blend_gain, blend_gain); - } - atom_gains[blk] = max_blend_gain; + float blend_gain = (seed_blend_fac * atom_criticality[blk] + + (1 - seed_blend_fac) * (molecule_stats.num_used_ext_inputs / max_molecule_stats.num_used_ext_inputs)); + blend_gain *= (1 + 0.2 * (molecule_stats.num_blocks - 1)); + atom_gains[blk] = blend_gain; } } else if (seed_type == e_cluster_seed::MAX_PINS || seed_type == e_cluster_seed::MAX_INPUT_PINS) { //By pins per molecule (i.e. available pins on primitives, not pins in use) for (auto blk : atom_ctx.nlist.blocks()) { - int max_molecule_pins = 0; - auto molecule_rng = atom_ctx.atom_molecules.equal_range(blk); - for (const auto& kv : vtr::make_range(molecule_rng.first, molecule_rng.second)) { - const t_pack_molecule* mol = kv.second; - - const t_molecule_stats molecule_stats = calc_molecule_stats(mol); - - //Keep the max over all molecules associated with the atom - int molecule_pins = 0; - if (seed_type == e_cluster_seed::MAX_PINS) { - //All pins - molecule_pins = molecule_stats.num_pins; - } else { - VTR_ASSERT(seed_type == e_cluster_seed::MAX_INPUT_PINS); - //Input pins only - molecule_pins = molecule_stats.num_input_pins; - } + const t_pack_molecule* mol = atom_ctx.prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_ctx.nlist); - //Keep the max over all molecules associated with the atom - max_molecule_pins = std::max(max_molecule_pins, molecule_pins); + int molecule_pins = 0; + if (seed_type == e_cluster_seed::MAX_PINS) { + //All pins + molecule_pins = molecule_stats.num_pins; + } else { + VTR_ASSERT(seed_type == e_cluster_seed::MAX_INPUT_PINS); + //Input pins only + molecule_pins = molecule_stats.num_input_pins; } - atom_gains[blk] = max_molecule_pins; + + atom_gains[blk] = molecule_pins; } } else if (seed_type == e_cluster_seed::BLEND2) { for (auto blk : atom_ctx.nlist.blocks()) { - float max_gain = 0; - auto molecule_rng = atom_ctx.atom_molecules.equal_range(blk); - for (const auto& kv : vtr::make_range(molecule_rng.first, molecule_rng.second)) { - const t_pack_molecule* mol = kv.second; + const t_pack_molecule* mol = atom_ctx.prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_ctx.nlist); - const t_molecule_stats molecule_stats = calc_molecule_stats(mol); + float pin_ratio = vtr::safe_ratio(molecule_stats.num_pins, max_molecule_stats.num_pins); + float input_pin_ratio = vtr::safe_ratio(molecule_stats.num_input_pins, max_molecule_stats.num_input_pins); + float output_pin_ratio = vtr::safe_ratio(molecule_stats.num_output_pins, max_molecule_stats.num_output_pins); + float used_ext_pin_ratio = vtr::safe_ratio(molecule_stats.num_used_ext_pins, max_molecule_stats.num_used_ext_pins); + float used_ext_input_pin_ratio = vtr::safe_ratio(molecule_stats.num_used_ext_inputs, max_molecule_stats.num_used_ext_inputs); + float used_ext_output_pin_ratio = vtr::safe_ratio(molecule_stats.num_used_ext_outputs, max_molecule_stats.num_used_ext_outputs); + float num_blocks_ratio = vtr::safe_ratio(molecule_stats.num_blocks, max_molecule_stats.num_blocks); + float criticality = atom_criticality[blk]; - float pin_ratio = vtr::safe_ratio(molecule_stats.num_pins, max_molecule_stats.num_pins); - float input_pin_ratio = vtr::safe_ratio(molecule_stats.num_input_pins, max_molecule_stats.num_input_pins); - float output_pin_ratio = vtr::safe_ratio(molecule_stats.num_output_pins, max_molecule_stats.num_output_pins); - float used_ext_pin_ratio = vtr::safe_ratio(molecule_stats.num_used_ext_pins, max_molecule_stats.num_used_ext_pins); - float used_ext_input_pin_ratio = vtr::safe_ratio(molecule_stats.num_used_ext_inputs, max_molecule_stats.num_used_ext_inputs); - float used_ext_output_pin_ratio = vtr::safe_ratio(molecule_stats.num_used_ext_outputs, max_molecule_stats.num_used_ext_outputs); - float num_blocks_ratio = vtr::safe_ratio(molecule_stats.num_blocks, max_molecule_stats.num_blocks); - float criticality = atom_criticality[blk]; + constexpr float PIN_WEIGHT = 0.; + constexpr float INPUT_PIN_WEIGHT = 0.5; + constexpr float OUTPUT_PIN_WEIGHT = 0.; + constexpr float USED_PIN_WEIGHT = 0.; + constexpr float USED_INPUT_PIN_WEIGHT = 0.2; + constexpr float USED_OUTPUT_PIN_WEIGHT = 0.; + constexpr float BLOCKS_WEIGHT = 0.2; + constexpr float CRITICALITY_WEIGHT = 0.1; - constexpr float PIN_WEIGHT = 0.; - constexpr float INPUT_PIN_WEIGHT = 0.5; - constexpr float OUTPUT_PIN_WEIGHT = 0.; - constexpr float USED_PIN_WEIGHT = 0.; - constexpr float USED_INPUT_PIN_WEIGHT = 0.2; - constexpr float USED_OUTPUT_PIN_WEIGHT = 0.; - constexpr float BLOCKS_WEIGHT = 0.2; - constexpr float CRITICALITY_WEIGHT = 0.1; + float gain = PIN_WEIGHT * pin_ratio + + INPUT_PIN_WEIGHT * input_pin_ratio + + OUTPUT_PIN_WEIGHT * output_pin_ratio - float gain = PIN_WEIGHT * pin_ratio - + INPUT_PIN_WEIGHT * input_pin_ratio - + OUTPUT_PIN_WEIGHT * output_pin_ratio + + USED_PIN_WEIGHT * used_ext_pin_ratio + + USED_INPUT_PIN_WEIGHT * used_ext_input_pin_ratio + + USED_OUTPUT_PIN_WEIGHT * used_ext_output_pin_ratio - + USED_PIN_WEIGHT * used_ext_pin_ratio - + USED_INPUT_PIN_WEIGHT * used_ext_input_pin_ratio - + USED_OUTPUT_PIN_WEIGHT * used_ext_output_pin_ratio + + BLOCKS_WEIGHT * num_blocks_ratio + + CRITICALITY_WEIGHT * criticality; - + BLOCKS_WEIGHT * num_blocks_ratio - + CRITICALITY_WEIGHT * criticality; - - max_gain = std::max(max_gain, gain); - } - - atom_gains[blk] = max_gain; + atom_gains[blk] = gain; } } else { @@ -2878,15 +2783,10 @@ t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, const std::vect if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { t_pack_molecule* best = nullptr; - // Iterate over all the molecules associated with the selected atom - // and select the one with the highest gain - auto rng = atom_ctx.atom_molecules.equal_range(blk_id); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* molecule = kv.second; - if (molecule->valid) { - if (best == nullptr || (best->base_gain) < (molecule->base_gain)) { - best = molecule; - } + t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + if (molecule->valid) { + if (best == nullptr || (best->base_gain) < (molecule->base_gain)) { + best = molecule; } } VTR_ASSERT(best != nullptr); @@ -2961,24 +2861,6 @@ float get_molecule_gain(t_pack_molecule* molecule, std::map& return gain; } -int compare_molecule_gain(const void* a, const void* b) { - float base_gain_a, base_gain_b, diff; - const t_pack_molecule *molecule_a, *molecule_b; - molecule_a = (*(const t_pack_molecule* const*)a); - molecule_b = (*(const t_pack_molecule* const*)b); - - base_gain_a = molecule_a->base_gain; - base_gain_b = molecule_b->base_gain; - diff = base_gain_a - base_gain_b; - if (diff > 0) { - return 1; - } - if (diff < 0) { - return -1; - } - return 0; -} - /* Determine if speculatively packed cur_pb is pin feasible * Runtime is actually not that bad for this. It's worst case O(k^2) where k is the * number of pb_graph pins. Can use hash tables or make incremental if becomes an issue. @@ -3368,12 +3250,9 @@ void load_transitive_fanout_candidates(ClusterBlockId clb_index, } else { pb_stats->gain[blk_id] += 0.001; } - auto rng = atom_ctx.atom_molecules.equal_range(blk_id); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* molecule = kv.second; - if (molecule->valid) { - transitive_fanout_candidates.insert({molecule->atom_block_ids[molecule->root], molecule}); - } + t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + if (molecule->valid) { + transitive_fanout_candidates.insert({molecule->atom_block_ids[molecule->root], molecule}); } } } diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h index 31a829c82d0..4f190645ff0 100644 --- a/vpr/src/pack/cluster_util.h +++ b/vpr/src/pack/cluster_util.h @@ -1,18 +1,16 @@ #ifndef CLUSTER_UTIL_H #define CLUSTER_UTIL_H -#include "globals.h" -#include "atom_netlist.h" +#include #include "pack_types.h" -#include "echo_files.h" -#include "vpr_utils.h" -#include "constraints_report.h" +#include "vtr_vector.h" -#include "concrete_timing_info.h" -#include "PreClusterDelayCalculator.h" -#include "PreClusterTimingGraphResolver.h" -#include "tatum/echo_writer.hpp" -#include "tatum/TimingReporter.hpp" +class AtomNetId; +class ClusterBlockId; +class PreClusterDelayCalculator; +class Prepacker; +class SetupTimingInfo; +class t_pack_molecule; /** * @file @@ -113,7 +111,7 @@ void check_clustering(); //calculate the initial timing at the start of packing stage void calc_init_packing_timing(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, - const std::unordered_map& expected_lowest_cost_pb_gnode, + const Prepacker& prepacker, std::shared_ptr& clustering_delay_calc, std::shared_ptr& timing_info, vtr::vector& atom_criticality); @@ -150,7 +148,7 @@ void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, t_cluster_placement_stats** cluster_placement_stats, t_pb_graph_node*** primitives_list, - t_pack_molecule* molecules_head, + const Prepacker& prepacker, t_clustering_data& clustering_data, std::unordered_map& net_output_feeds_driving_block_input, int& unclustered_list_head_size, @@ -437,13 +435,7 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, const int& unclustered_list_head_size, std::map>& primitive_candidate_block_types); -void mark_all_molecules_valid(t_pack_molecule* molecule_head); - -int count_molecules(t_pack_molecule* molecule_head); - -t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule); - -t_molecule_stats calc_max_molecules_stats(const t_pack_molecule* molecule_head); +t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const AtomNetlist& atom_nlist); std::vector initialize_seed_atoms(const e_cluster_seed seed_type, const t_molecule_stats& max_molecule_stats, @@ -453,7 +445,6 @@ t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, const std::vect float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures); -int compare_molecule_gain(const void* a, const void* b); int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id); void print_seed_gains(const char* fname, const std::vector& seed_atoms, const vtr::vector& atom_gain, const vtr::vector& atom_criticality); diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index a76c9e698d4..e8c68ea2244 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -1,22 +1,18 @@ #include -#include -#include +#include "vpr_context.h" #include "vtr_assert.h" #include "vtr_log.h" -#include "vtr_math.h" #include "vpr_error.h" #include "vpr_types.h" -#include "read_xml_arch_file.h" #include "globals.h" #include "prepack.h" #include "pack_types.h" #include "pack.h" #include "cluster.h" #include "SetupGrid.h" -#include "re_cluster.h" #include "noc_aware_cluster_util.h" /* #define DUMP_PB_GRAPH 1 */ @@ -42,14 +38,13 @@ bool try_pack(t_packer_opts* packer_opts, const t_model* library_models, float interc_delay, std::vector* lb_type_rr_graphs) { - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& atom_ctx = g_vpr_ctx.atom(); - auto& atom_mutable_ctx = g_vpr_ctx.mutable_atom(); + AtomContext& atom_mutable_ctx = g_vpr_ctx.mutable_atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + ClusteringHelperContext& helper_ctx = g_vpr_ctx.mutable_cl_helper(); + const DeviceContext& device_ctx = g_vpr_ctx.device(); std::unordered_set is_clock, is_global; - std::unordered_map expected_lowest_cost_pb_gnode; //The molecules associated with each atom block t_clustering_data clustering_data; - std::vector list_of_packing_patterns; VTR_LOG("Begin packing '%s'.\n", packer_opts->circuit_file_name.c_str()); /* determine number of models in the architecture */ @@ -75,21 +70,9 @@ bool try_pack(t_packer_opts* packer_opts, VTR_LOG("\ttotal blocks: %zu, total nets: %zu, total inputs: %zu, total outputs: %zu\n", atom_ctx.nlist.blocks().size(), atom_ctx.nlist.nets().size(), num_p_inputs, num_p_outputs); + // Run the prepacker, packing the atoms into molecules. VTR_LOG("Begin prepacking.\n"); - list_of_packing_patterns = alloc_and_load_pack_patterns(); - - //To ensure the list of packing patterns gets freed in case of an error, we create - //a unique_ptr with custom deleter which will free the list at the end of the current - //scope. - auto list_of_packing_patterns_deleter = [](std::vector* ptr) { - free_list_of_pack_patterns(*ptr); - }; - std::unique_ptr, decltype(list_of_packing_patterns_deleter)> list_of_packing_patterns_cleanup_guard(&list_of_packing_patterns, - list_of_packing_patterns_deleter); - - atom_mutable_ctx.list_of_pack_molecules.reset(alloc_and_load_pack_molecules(list_of_packing_patterns.data(), - expected_lowest_cost_pb_gnode, - list_of_packing_patterns.size())); + atom_mutable_ctx.prepacker.init(atom_ctx.nlist, device_ctx.logical_block_types); /* We keep attraction groups off in the first iteration, and * only turn on in later iterations if some floorplan regions turn out to be overfull. @@ -137,10 +120,10 @@ bool try_pack(t_packer_opts* packer_opts, helper_ctx.num_used_type_instances = do_clustering( *packer_opts, *analysis_opts, - arch, atom_mutable_ctx.list_of_pack_molecules.get(), + arch, + atom_mutable_ctx.prepacker, is_clock, is_global, - expected_lowest_cost_pb_gnode, allow_unrelated_clustering, balance_block_type_util, lb_type_rr_graphs, @@ -304,7 +287,6 @@ std::unordered_set alloc_and_load_is_clock() { * the corresponding entry by adding the clock to is_clock. * only for an error check. */ - int num_clocks = 0; std::unordered_set is_clock; /* Want to identify all the clock nets. */ @@ -315,7 +297,6 @@ std::unordered_set alloc_and_load_is_clock() { auto net_id = atom_ctx.nlist.pin_net(pin_id); if (!is_clock.count(net_id)) { is_clock.insert(net_id); - num_clocks++; } } } @@ -385,4 +366,4 @@ static int count_models(const t_model* user_models) { } return n_models; -} \ No newline at end of file +} diff --git a/vpr/src/pack/pack.h b/vpr/src/pack/pack.h index 9f811b78f52..0115d2c859a 100644 --- a/vpr/src/pack/pack.h +++ b/vpr/src/pack/pack.h @@ -1,9 +1,11 @@ #ifndef PACK_H #define PACK_H + #include #include #include "vpr_types.h" -#include "atom_netlist_fwd.h" + +class AtomNetId; bool try_pack(t_packer_opts* packer_opts, const t_analysis_opts* analysis_opts, diff --git a/vpr/src/pack/prepack.cpp b/vpr/src/pack/prepack.cpp index 3307472ac79..a5928ba85f7 100644 --- a/vpr/src/pack/prepack.cpp +++ b/vpr/src/pack/prepack.cpp @@ -10,30 +10,43 @@ * March 12, 2012 */ +#include "prepack.h" + #include #include #include #include #include +#include -#include "vtr_util.h" -#include "vtr_assert.h" -#include "vtr_memory.h" - -#include "vpr_types.h" -#include "vpr_error.h" - -#include "read_xml_arch_file.h" -#include "globals.h" #include "atom_netlist.h" -#include "prepack.h" -#include "vpr_utils.h" +#include "cluster_util.h" #include "echo_files.h" -#include "attraction_groups.h" +#include "physical_types.h" +#include "vpr_error.h" +#include "vpr_types.h" +#include "vpr_utils.h" +#include "vtr_assert.h" +#include "vtr_range.h" +#include "vtr_util.h" +#include "vtr_vector.h" /*****************************************/ /*Local Function Declaration */ /*****************************************/ +static std::vector alloc_and_load_pack_patterns(const std::vector& logical_block_types); + +static void free_list_of_pack_patterns(std::vector& list_of_pack_patterns); + +static void free_pack_pattern(t_pack_patterns* pack_pattern); + +static t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_patterns, + vtr::vector& expected_lowest_cost_pb_gnode, + const int num_packing_patterns, + std::multimap& atom_molecules, + const AtomNetlist& atom_nlist, + const std::vector& logical_block_types); + static void discover_pattern_names_in_pb_graph_node(t_pb_graph_node* pb_graph_node, std::unordered_map& pattern_names); @@ -65,21 +78,30 @@ static void free_pack_pattern_block(t_pack_pattern_block* pattern_block, t_pack_ static t_pack_molecule* try_create_molecule(t_pack_patterns* list_of_pack_patterns, const int pack_pattern_index, - AtomBlockId blk_id); + AtomBlockId blk_id, + std::multimap& atom_molecules, + const AtomNetlist& atom_nlist); static bool try_expand_molecule(t_pack_molecule* molecule, - const AtomBlockId blk_id); + const AtomBlockId blk_id, + const std::multimap& atom_molecules, + const AtomNetlist& atom_nlist); static void print_pack_molecules(const char* fname, const t_pack_patterns* list_of_pack_patterns, const int num_pack_patterns, - const t_pack_molecule* list_of_molecules); + const t_pack_molecule* list_of_molecules, + const AtomNetlist& atom_nlist); -static t_pb_graph_node* get_expected_lowest_cost_primitive_for_atom_block(const AtomBlockId blk_id); +static t_pb_graph_node* get_expected_lowest_cost_primitive_for_atom_block(const AtomBlockId blk_id, + const std::vector& logical_block_types); static t_pb_graph_node* get_expected_lowest_cost_primitive_for_atom_block_in_pb_graph_node(const AtomBlockId blk_id, t_pb_graph_node* curr_pb_graph_node, float* cost); -static AtomBlockId find_new_root_atom_for_chain(const AtomBlockId blk_id, const t_pack_patterns* list_of_pack_pattern); +static AtomBlockId find_new_root_atom_for_chain(const AtomBlockId blk_id, + const t_pack_patterns* list_of_pack_patterns, + const std::multimap& atom_molecules, + const AtomNetlist& atom_nlist); static std::vector find_end_of_path(t_pb_graph_pin* input_pin, int pattern_index); @@ -90,15 +112,22 @@ static void find_all_equivalent_chains(t_pack_patterns* chain_pattern, const t_p static void update_chain_root_pins(t_pack_patterns* chain_pattern, const std::vector& chain_input_pins); -static t_pb_graph_pin* get_connected_primitive_pin(const t_pb_graph_pin* input_pin, const int pack_pattern); - static void get_all_connected_primitive_pins(const t_pb_graph_pin* cluster_input_pin, std::vector& connected_primitive_pins); -static void init_molecule_chain_info(const AtomBlockId blk_id, t_pack_molecule* molecule); +static void init_molecule_chain_info(const AtomBlockId blk_id, + t_pack_molecule* molecule, + const std::multimap& atom_molecules, + const AtomNetlist& atom_nlist); -static AtomBlockId get_sink_block(const AtomBlockId block_id, const t_model_ports* model_port, const BitIndex pin_number); +static AtomBlockId get_sink_block(const AtomBlockId block_id, + const t_model_ports* model_port, + const BitIndex pin_number, + const AtomNetlist& atom_nlist); -static AtomBlockId get_driving_block(const AtomBlockId block_id, const t_model_ports* model_port, const BitIndex pin_number); +static AtomBlockId get_driving_block(const AtomBlockId block_id, + const t_model_ports* model_port, + const BitIndex pin_number, + const AtomNetlist& atom_nlist); static void print_chain_starting_points(t_pack_patterns* chain_pattern); @@ -116,15 +145,14 @@ static void print_chain_starting_points(t_pack_patterns* chain_pattern); * (general packing) or upstream (in tech mapping). * If this limitation is too constraining, code is designed so that this limitation can be removed. */ -std::vector alloc_and_load_pack_patterns() { +static std::vector alloc_and_load_pack_patterns(const std::vector& logical_block_types) { int L_num_blocks; std::vector list_of_packing_patterns; t_pb_graph_edge* expansion_edge; - auto& device_ctx = g_vpr_ctx.device(); /* alloc and initialize array of packing patterns based on architecture complex blocks */ std::unordered_map pattern_names; - for (auto& type : device_ctx.logical_block_types) { + for (const t_logical_block_type& type : logical_block_types) { discover_pattern_names_in_pb_graph_node(type.pb_graph_head, pattern_names); } @@ -132,7 +160,7 @@ std::vector alloc_and_load_pack_patterns() { /* load packing patterns by traversing the edges to find edges belonging to pattern */ for (size_t i = 0; i < pattern_names.size(); i++) { - for (auto& type : device_ctx.logical_block_types) { + for (const t_logical_block_type& type : logical_block_types) { // find an edge that belongs to this pattern expansion_edge = find_expansion_edge_of_pattern(i, type.pb_graph_head); if (!expansion_edge) { @@ -343,13 +371,13 @@ static std::vector alloc_and_init_pattern_list_from_hash(std::u return nlist; } -void free_list_of_pack_patterns(std::vector& list_of_pack_patterns) { +static void free_list_of_pack_patterns(std::vector& list_of_pack_patterns) { for (size_t i = 0; i < list_of_pack_patterns.size(); i++) { free_pack_pattern(&list_of_pack_patterns[i]); } } -void free_pack_pattern(t_pack_patterns* pack_pattern) { +static void free_pack_pattern(t_pack_patterns* pack_pattern) { if (pack_pattern) { int num_pack_pattern_blocks = pack_pattern->num_blocks; t_pack_pattern_block** pattern_block_list = new t_pack_pattern_block*[num_pack_pattern_blocks]; @@ -772,15 +800,16 @@ static void backward_expand_pack_pattern_from_edge(const t_pb_graph_edge* expans * 3. Chained molecules are molecules that follow a carry-chain style pattern, * ie. a single linear chain that can be split across multiple complex blocks */ -t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_patterns, - std::unordered_map& expected_lowest_cost_pb_gnode, - const int num_packing_patterns) { +static t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_patterns, + vtr::vector& expected_lowest_cost_pb_gnode, + const int num_packing_patterns, + std::multimap& atom_molecules, + const AtomNetlist& atom_nlist, + const std::vector& logical_block_types) { int i, j, best_pattern; t_pack_molecule* list_of_molecules_head; t_pack_molecule* cur_molecule; bool* is_used; - auto& atom_ctx = g_vpr_ctx.atom(); - auto& atom_mutable_ctx = g_vpr_ctx.mutable_atom(); is_used = new bool[num_packing_patterns]; for (i = 0; i < num_packing_patterns; i++) @@ -813,11 +842,11 @@ t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_pat VTR_ASSERT(is_used[best_pattern] == false); is_used[best_pattern] = true; - auto blocks = atom_ctx.nlist.blocks(); + auto blocks = atom_nlist.blocks(); for (auto blk_iter = blocks.begin(); blk_iter != blocks.end(); ++blk_iter) { auto blk_id = *blk_iter; - cur_molecule = try_create_molecule(list_of_pack_patterns, best_pattern, blk_id); + cur_molecule = try_create_molecule(list_of_pack_patterns, best_pattern, blk_id, atom_molecules, atom_nlist); if (cur_molecule != nullptr) { cur_molecule->next = list_of_molecules_head; /* In the event of multiple molecules with the same atom block pattern, @@ -829,7 +858,7 @@ t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_pat //Note: atom_molecules is an (ordered) multimap so the last molecule // inserted for a given blk_id will be the last valid element // in the equal_range - auto rng = atom_ctx.atom_molecules.equal_range(blk_id); //The range of molecules matching this block + auto rng = atom_molecules.equal_range(blk_id); //The range of molecules matching this block bool range_empty = (rng.first == rng.second); bool cur_was_last_inserted = false; if (!range_empty) { @@ -852,8 +881,8 @@ t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_pat * If a block belongs to a molecule, then carrying the single atoms around can make the packing problem * more difficult because now it needs to consider splitting molecules. */ - for (auto blk_id : atom_ctx.nlist.blocks()) { - t_pb_graph_node* best = get_expected_lowest_cost_primitive_for_atom_block(blk_id); + for (auto blk_id : atom_nlist.blocks()) { + t_pb_graph_node* best = get_expected_lowest_cost_primitive_for_atom_block(blk_id, logical_block_types); if (!best) { /* Free the molecules in the linked list to avoid memory leakage */ cur_molecule = list_of_molecules_head; @@ -864,14 +893,14 @@ t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_pat } VPR_FATAL_ERROR(VPR_ERROR_PACK, "Failed to find any location to pack primitive of type '%s' in architecture", - atom_ctx.nlist.block_model(blk_id)->name); + atom_nlist.block_model(blk_id)->name); } VTR_ASSERT_SAFE(nullptr != best); expected_lowest_cost_pb_gnode[blk_id] = best; - auto rng = atom_ctx.atom_molecules.equal_range(blk_id); + auto rng = atom_molecules.equal_range(blk_id); bool rng_empty = (rng.first == rng.second); if (rng_empty) { cur_molecule = new t_pack_molecule; @@ -887,14 +916,15 @@ t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_pat cur_molecule->base_gain = 1; list_of_molecules_head = cur_molecule; - atom_mutable_ctx.atom_molecules.insert({blk_id, cur_molecule}); + atom_molecules.insert({blk_id, cur_molecule}); } } if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_PRE_PACKING_MOLECULES_AND_PATTERNS)) { print_pack_molecules(getEchoFileName(E_ECHO_PRE_PACKING_MOLECULES_AND_PATTERNS), list_of_pack_patterns, num_packing_patterns, - list_of_molecules_head); + list_of_molecules_head, + atom_nlist); } return list_of_molecules_head; @@ -933,11 +963,11 @@ static void free_pack_pattern_block(t_pack_pattern_block* pattern_block, t_pack_ */ static t_pack_molecule* try_create_molecule(t_pack_patterns* list_of_pack_patterns, const int pack_pattern_index, - AtomBlockId blk_id) { + AtomBlockId blk_id, + std::multimap& atom_molecules, + const AtomNetlist& atom_nlist) { t_pack_molecule* molecule; - auto& atom_mutable_ctx = g_vpr_ctx.mutable_atom(); - auto pack_pattern = &list_of_pack_patterns[pack_pattern_index]; // Check pack pattern validity @@ -948,7 +978,7 @@ static t_pack_molecule* try_create_molecule(t_pack_patterns* list_of_pack_patter // If a chain pattern extends beyond a single logic block, we must find // the furthest blk_id up the chain that is not mapped to a molecule yet. if (pack_pattern->is_chain) { - blk_id = find_new_root_atom_for_chain(blk_id, pack_pattern); + blk_id = find_new_root_atom_for_chain(blk_id, pack_pattern, atom_molecules, atom_nlist); if (!blk_id) return nullptr; } @@ -960,12 +990,12 @@ static t_pack_molecule* try_create_molecule(t_pack_patterns* list_of_pack_patter molecule->num_blocks = pack_pattern->num_blocks; molecule->root = pack_pattern->root_block->block_id; - if (try_expand_molecule(molecule, blk_id)) { + if (try_expand_molecule(molecule, blk_id, atom_molecules, atom_nlist)) { // Success! commit molecule // update chain info for chain molecules if (molecule->pack_pattern->is_chain) { - init_molecule_chain_info(blk_id, molecule); + init_molecule_chain_info(blk_id, molecule, atom_molecules, atom_nlist); } // update the atom_molcules with the atoms that are mapped to this molecule @@ -976,7 +1006,7 @@ static t_pack_molecule* try_create_molecule(t_pack_patterns* list_of_pack_patter continue; } - atom_mutable_ctx.atom_molecules.insert({blk_id2, molecule}); + atom_molecules.insert({blk_id2, molecule}); } } else { // Failed to create molecule @@ -1001,9 +1031,9 @@ static t_pack_molecule* try_create_molecule(t_pack_patterns* list_of_pack_patter * blk_id : chosen to be the root of this molecule and the code is expanding from */ static bool try_expand_molecule(t_pack_molecule* molecule, - const AtomBlockId blk_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - + const AtomBlockId blk_id, + const std::multimap& atom_molecules, + const AtomNetlist& atom_nlist) { // root block of the pack pattern, which is the starting point of this pattern const auto pattern_root_block = molecule->pack_pattern->root_block; // bool array indicating whether a position in a pack pattern is optional or should @@ -1035,7 +1065,7 @@ static bool try_expand_molecule(t_pack_molecule* molecule, continue; } - if (!block_id || !primitive_type_feasible(block_id, pattern_block->pb_type) || (molecule_atom_block_id && molecule_atom_block_id != block_id) || atom_ctx.atom_molecules.find(block_id) != atom_ctx.atom_molecules.end()) { + if (!block_id || !primitive_type_feasible(block_id, pattern_block->pb_type) || (molecule_atom_block_id && molecule_atom_block_id != block_id) || atom_molecules.find(block_id) != atom_molecules.end()) { // Stopping conditions, if: // 1) this is an invalid atom block (nothing) // 2) this atom block cannot fit in this primitive type @@ -1062,7 +1092,7 @@ static bool try_expand_molecule(t_pack_molecule* molecule, // find the block this connection is driving and add it to the queue auto port_model = block_connection->from_pin->port->model_port; auto ipin = block_connection->from_pin->pin_number; - auto sink_blk_id = get_sink_block(block_id, port_model, ipin); + auto sink_blk_id = get_sink_block(block_id, port_model, ipin, atom_nlist); // add this sink block id with its corresponding pattern block to the queue pattern_block_queue.push(std::make_pair(block_connection->to_block, sink_blk_id)); // this block is being driven by this connection @@ -1070,7 +1100,7 @@ static bool try_expand_molecule(t_pack_molecule* molecule, // find the block that is driving this connection and it to the queue auto port_model = block_connection->to_pin->port->model_port; auto ipin = block_connection->to_pin->pin_number; - auto driver_blk_id = get_driving_block(block_id, port_model, ipin); + auto driver_blk_id = get_driving_block(block_id, port_model, ipin, atom_nlist); // add this driver block id with its corresponding pattern block to the queue pattern_block_queue.push(std::make_pair(block_connection->from_block, driver_blk_id)); } @@ -1094,17 +1124,18 @@ static bool try_expand_molecule(t_pack_molecule* molecule, * model_port : the model of the port driving the net * pin_number : the pin_number of the pin driving the net (pin index within the port) */ -static AtomBlockId get_sink_block(const AtomBlockId block_id, const t_model_ports* model_port, const BitIndex pin_number) { - auto& atom_ctx = g_vpr_ctx.atom(); - - auto port_id = atom_ctx.nlist.find_atom_port(block_id, model_port); +static AtomBlockId get_sink_block(const AtomBlockId block_id, + const t_model_ports* model_port, + const BitIndex pin_number, + const AtomNetlist& atom_nlist) { + auto port_id = atom_nlist.find_atom_port(block_id, model_port); if (port_id) { - auto net_id = atom_ctx.nlist.port_net(port_id, pin_number); - if (net_id && atom_ctx.nlist.net_sinks(net_id).size() == 1) { /* Single fanout assumption */ - auto net_sinks = atom_ctx.nlist.net_sinks(net_id); + auto net_id = atom_nlist.port_net(port_id, pin_number); + if (net_id && atom_nlist.net_sinks(net_id).size() == 1) { /* Single fanout assumption */ + auto net_sinks = atom_nlist.net_sinks(net_id); auto sink_pin_id = *(net_sinks.begin()); - return atom_ctx.nlist.pin_block(sink_pin_id); + return atom_nlist.pin_block(sink_pin_id); } } @@ -1119,19 +1150,20 @@ static AtomBlockId get_sink_block(const AtomBlockId block_id, const t_model_port * model_port : the model of the port driven by the net * pin_number : the pin_number of the pin driven by the net (pin index within the port) */ -static AtomBlockId get_driving_block(const AtomBlockId block_id, const t_model_ports* model_port, const BitIndex pin_number) { - auto& atom_ctx = g_vpr_ctx.atom(); - - auto port_id = atom_ctx.nlist.find_atom_port(block_id, model_port); +static AtomBlockId get_driving_block(const AtomBlockId block_id, + const t_model_ports* model_port, + const BitIndex pin_number, + const AtomNetlist& atom_nlist) { + auto port_id = atom_nlist.find_atom_port(block_id, model_port); if (port_id) { - auto net_id = atom_ctx.nlist.port_net(port_id, pin_number); - if (net_id && atom_ctx.nlist.net_sinks(net_id).size() == 1) { /* Single fanout assumption */ + auto net_id = atom_nlist.port_net(port_id, pin_number); + if (net_id && atom_nlist.net_sinks(net_id).size() == 1) { /* Single fanout assumption */ - auto driver_blk_id = atom_ctx.nlist.net_driver_block(net_id); + auto driver_blk_id = atom_nlist.net_driver_block(net_id); if (model_port->is_clock) { - auto driver_blk_type = atom_ctx.nlist.block_type(driver_blk_id); + auto driver_blk_type = atom_nlist.block_type(driver_blk_id); // TODO: support multi-clock primitives. // If the driver block is a .input block, this assertion should not @@ -1141,7 +1173,7 @@ static AtomBlockId get_driving_block(const AtomBlockId block_id, const t_model_p VTR_ASSERT(pin_number == 1 || (pin_number == 0 && driver_blk_type == AtomBlockType::INPAD)); } - return atom_ctx.nlist.net_driver_block(net_id); + return atom_nlist.net_driver_block(net_id); } } @@ -1151,11 +1183,11 @@ static AtomBlockId get_driving_block(const AtomBlockId block_id, const t_model_p static void print_pack_molecules(const char* fname, const t_pack_patterns* list_of_pack_patterns, const int num_pack_patterns, - const t_pack_molecule* list_of_molecules) { + const t_pack_molecule* list_of_molecules, + const AtomNetlist& atom_nlist) { int i; FILE* fp; const t_pack_molecule* list_of_molecules_current; - auto& atom_ctx = g_vpr_ctx.atom(); fp = std::fopen(fname, "w"); fprintf(fp, "# of pack patterns %d\n", num_pack_patterns); @@ -1174,7 +1206,7 @@ static void print_pack_molecules(const char* fname, if (list_of_molecules_current->type == MOLECULE_SINGLE_ATOM) { fprintf(fp, "\nmolecule type: atom\n"); fprintf(fp, "\tpattern index %d: atom block %s\n", i, - atom_ctx.nlist.block_name(list_of_molecules_current->atom_block_ids[0]).c_str()); + atom_nlist.block_name(list_of_molecules_current->atom_block_ids[0]).c_str()); } else if (list_of_molecules_current->type == MOLECULE_FORCED_PACK) { fprintf(fp, "\nmolecule type: %s\n", list_of_molecules_current->pack_pattern->name); @@ -1185,7 +1217,7 @@ static void print_pack_molecules(const char* fname, } else { fprintf(fp, "\tpattern index %d: atom block %s", i, - atom_ctx.nlist.block_name(list_of_molecules_current->atom_block_ids[i]).c_str()); + atom_nlist.block_name(list_of_molecules_current->atom_block_ids[i]).c_str()); if (list_of_molecules_current->pack_pattern->root_block->block_id == i) { fprintf(fp, " root node\n"); } else { @@ -1203,15 +1235,15 @@ static void print_pack_molecules(const char* fname, } /* Search through all primitives and return the lowest cost primitive that fits this atom block */ -static t_pb_graph_node* get_expected_lowest_cost_primitive_for_atom_block(const AtomBlockId blk_id) { +static t_pb_graph_node* get_expected_lowest_cost_primitive_for_atom_block(const AtomBlockId blk_id, + const std::vector& logical_block_types) { float cost, best_cost; t_pb_graph_node *current, *best; - auto& device_ctx = g_vpr_ctx.device(); best_cost = UNDEFINED; best = nullptr; current = nullptr; - for (const auto& type : device_ctx.logical_block_types) { + for (const t_logical_block_type& type : logical_block_types) { cost = UNDEFINED; current = get_expected_lowest_cost_primitive_for_atom_block_in_pb_graph_node(blk_id, type.pb_graph_head, &cost); if (cost != UNDEFINED) { @@ -1301,17 +1333,18 @@ static int compare_pack_pattern(const t_pack_patterns* pattern_a, const t_pack_p * block_index: index of current atom * list_of_pack_pattern: ptr to current chain pattern */ -static AtomBlockId find_new_root_atom_for_chain(const AtomBlockId blk_id, const t_pack_patterns* list_of_pack_pattern) { +static AtomBlockId find_new_root_atom_for_chain(const AtomBlockId blk_id, + const t_pack_patterns* list_of_pack_patterns, + const std::multimap& atom_molecules, + const AtomNetlist& atom_nlist) { AtomBlockId new_root_blk_id; t_pb_graph_pin* root_ipin; t_pb_graph_node* root_pb_graph_node; t_model_ports* model_port; - auto& atom_ctx = g_vpr_ctx.atom(); - - VTR_ASSERT(list_of_pack_pattern->is_chain == true); - VTR_ASSERT(list_of_pack_pattern->chain_root_pins.size()); - root_ipin = list_of_pack_pattern->chain_root_pins[0][0]; + VTR_ASSERT(list_of_pack_patterns->is_chain == true); + VTR_ASSERT(list_of_pack_patterns->chain_root_pins.size()); + root_ipin = list_of_pack_patterns->chain_root_pins[0][0]; root_pb_graph_node = root_ipin->parent_node; if (primitive_type_feasible(blk_id, root_pb_graph_node->pb_type) == false) { @@ -1322,7 +1355,7 @@ static AtomBlockId find_new_root_atom_for_chain(const AtomBlockId blk_id, const model_port = root_ipin->port->model_port; // find the block id of the atom block driving the input of this block - AtomBlockId driver_blk_id = atom_ctx.nlist.find_atom_pin_driver(blk_id, model_port, root_ipin->pin_number); + AtomBlockId driver_blk_id = atom_nlist.find_atom_pin_driver(blk_id, model_port, root_ipin->pin_number); // if there is no driver block for this net // then it is the furthest up the chain @@ -1330,7 +1363,7 @@ static AtomBlockId find_new_root_atom_for_chain(const AtomBlockId blk_id, const return blk_id; } // check if driver atom is already packed - auto rng = atom_ctx.atom_molecules.equal_range(driver_blk_id); + auto rng = atom_molecules.equal_range(driver_blk_id); bool rng_empty = (rng.first == rng.second); if (!rng_empty) { /* Driver is used/invalid, so current block is the furthest up the chain, return it */ @@ -1338,7 +1371,7 @@ static AtomBlockId find_new_root_atom_for_chain(const AtomBlockId blk_id, const } // didn't find furthest atom up the chain, keep searching further up the chain - new_root_blk_id = find_new_root_atom_for_chain(driver_blk_id, list_of_pack_pattern); + new_root_blk_id = find_new_root_atom_for_chain(driver_blk_id, list_of_pack_patterns, atom_molecules, atom_nlist); if (!new_root_blk_id) { return blk_id; @@ -1561,30 +1594,6 @@ static void update_chain_root_pins(t_pack_patterns* chain_pattern, chain_pattern->chain_root_pins = primitive_input_pins; } -/** - * Find the next primitive input pin connected to the given cluster_input_pin. - * Following edges that are annotated with pack_pattern index - */ -static t_pb_graph_pin* get_connected_primitive_pin(const t_pb_graph_pin* cluster_input_pin, const int pack_pattern) { - for (int iedge = 0; iedge < cluster_input_pin->num_output_edges; iedge++) { - const auto& output_edge = cluster_input_pin->output_edges[iedge]; - // if edge is annotated with pack pattern or its pack pattern could be inferred - if (output_edge->annotated_with_pattern(pack_pattern) || output_edge->infer_pattern) { - for (int ipin = 0; ipin < output_edge->num_output_pins; ipin++) { - if (output_edge->output_pins[ipin]->is_primitive_pin()) { - return output_edge->output_pins[ipin]; - } - return get_connected_primitive_pin(output_edge->output_pins[ipin], pack_pattern); - } - } - } - - // primitive input pin should always - // be found when using this function - VTR_ASSERT(false); - return nullptr; -} - /** * This function takes a pin as an input an does a depth first search on all the output edges * of this pin till it finds all the primitive input pins connected to this pin. For example, @@ -1623,28 +1632,29 @@ static void get_all_connected_primitive_pins(const t_pb_graph_pin* cluster_input * The second one should should be the molecule directly after that one * and so on. */ -static void init_molecule_chain_info(const AtomBlockId blk_id, t_pack_molecule* molecule) { +static void init_molecule_chain_info(const AtomBlockId blk_id, + t_pack_molecule* molecule, + const std::multimap &atom_molecules, + const AtomNetlist& atom_nlist) { // the input molecule to this function should have a pack // pattern assigned to it and the input block should be valid VTR_ASSERT(molecule->pack_pattern && blk_id); - auto& atom_ctx = g_vpr_ctx.atom(); - auto root_ipin = molecule->pack_pattern->chain_root_pins[0][0]; auto model_pin = root_ipin->port->model_port; auto pin_bit = root_ipin->pin_number; // find the atom driving the chain input pin of this atom - auto driver_atom_id = atom_ctx.nlist.find_atom_pin_driver(blk_id, model_pin, pin_bit); + auto driver_atom_id = atom_nlist.find_atom_pin_driver(blk_id, model_pin, pin_bit); // find the molecule this driver atom is mapped to - auto itr = atom_ctx.atom_molecules.find(driver_atom_id); + auto itr = atom_molecules.find(driver_atom_id); // if this is the first molecule to be created for this chain // initialize the chain info data structure. This is the case // if either there is no driver to the block input pin or // if the driver is not part of a molecule - if (!driver_atom_id || itr == atom_ctx.atom_molecules.end()) { + if (!driver_atom_id || itr == atom_molecules.end()) { // allocate chain info molecule->chain_info = std::make_shared(); // this is not the first molecule to be created for this chain @@ -1681,3 +1691,79 @@ static void print_chain_starting_points(t_pack_patterns* chain_pattern) { VTR_LOG("\n"); } + +/** + * This function frees the linked list of pack molecules. + */ +static void free_pack_molecules(t_pack_molecule* list_of_pack_molecules) { + t_pack_molecule* cur_pack_molecule = list_of_pack_molecules; + while (cur_pack_molecule != nullptr) { + cur_pack_molecule = list_of_pack_molecules->next; + delete list_of_pack_molecules; + list_of_pack_molecules = cur_pack_molecule; + } +} + +void Prepacker::init(const AtomNetlist& atom_nlist, const std::vector& logical_block_types) { + VTR_ASSERT(list_of_pack_molecules == nullptr && "Prepacker cannot be initialized twice."); + + // Allocate the pack patterns from the logical block types. + list_of_pack_patterns = alloc_and_load_pack_patterns(logical_block_types); + // Use the pack patterns to allocate and load the pack molecules. + std::multimap atom_molecules_multimap; + expected_lowest_cost_pb_gnode.resize(atom_nlist.blocks().size(), nullptr); + list_of_pack_molecules = alloc_and_load_pack_molecules(list_of_pack_patterns.data(), + expected_lowest_cost_pb_gnode, + list_of_pack_patterns.size(), + atom_molecules_multimap, + atom_nlist, + logical_block_types); + + // The multimap is a legacy thing. Since blocks can be part of multiple pack + // patterns, during prepacking a block may be contained within multiple + // molecules. However, by the end of prepacking, molecules should be + // combined such that each block is contained in one and only one molecule. + atom_molecules.resize(atom_nlist.blocks().size(), nullptr); + for (AtomBlockId blk_id : atom_nlist.blocks()) { + // Every atom block should be packed into a single molecule (no more + // or less). + VTR_ASSERT(atom_molecules_multimap.count(blk_id) == 1); + atom_molecules[blk_id] = atom_molecules_multimap.find(blk_id)->second; + } +} + +t_molecule_stats Prepacker::calc_max_molecule_stats(const AtomNetlist& atom_nlist) const { + t_molecule_stats max_molecules_stats; + t_pack_molecule* molecule_head = list_of_pack_molecules; + for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { + //Calculate per-molecule statistics + (void)atom_nlist; + t_molecule_stats cur_molecule_stats = calc_molecule_stats(cur_molecule, atom_nlist); + + //Record the maximums (member-wise) over all molecules + max_molecules_stats.num_blocks = std::max(max_molecules_stats.num_blocks, cur_molecule_stats.num_blocks); + + max_molecules_stats.num_pins = std::max(max_molecules_stats.num_pins, cur_molecule_stats.num_pins); + max_molecules_stats.num_input_pins = std::max(max_molecules_stats.num_input_pins, cur_molecule_stats.num_input_pins); + max_molecules_stats.num_output_pins = std::max(max_molecules_stats.num_output_pins, cur_molecule_stats.num_output_pins); + + max_molecules_stats.num_used_ext_pins = std::max(max_molecules_stats.num_used_ext_pins, cur_molecule_stats.num_used_ext_pins); + max_molecules_stats.num_used_ext_inputs = std::max(max_molecules_stats.num_used_ext_inputs, cur_molecule_stats.num_used_ext_inputs); + max_molecules_stats.num_used_ext_outputs = std::max(max_molecules_stats.num_used_ext_outputs, cur_molecule_stats.num_used_ext_outputs); + } + + return max_molecules_stats; +} + +void Prepacker::reset() { + // When the prepacker is reset (or destroyed), clean up the internal data + // members. + free_list_of_pack_patterns(list_of_pack_patterns); + free_pack_molecules(list_of_pack_molecules); + // Reset everything to default state. + list_of_pack_patterns.clear(); + list_of_pack_molecules = nullptr; + atom_molecules.clear(); + expected_lowest_cost_pb_gnode.clear(); +} + diff --git a/vpr/src/pack/prepack.h b/vpr/src/pack/prepack.h index de29a40436b..e6ac79cd425 100644 --- a/vpr/src/pack/prepack.h +++ b/vpr/src/pack/prepack.h @@ -1,21 +1,198 @@ /* - * Prepacking: Group together technology-mapped netlist blocks before packing. This gives hints to the packer on what groups of blocks to keep together during packing. - * Primary use 1) "Forced" packs (eg LUT+FF pair) - * 2) Carry-chains + * Prepacking: Group together technology-mapped netlist blocks before packing. + * This gives hints to the packer on what groups of blocks to keep together + * during packing. + * + * Primary uses: 1) "Forced" packs (eg LUT+FF pair) + * 2) Carry-chains */ #ifndef PREPACK_H #define PREPACK_H -#include -#include "atom_netlist_fwd.h" -#include "arch_types.h" + +#include #include "vpr_types.h" +#include "vtr_assert.h" +#include "vtr_vector.h" + +class AtomNetlist; +class AtomBlockId; +struct t_molecule_stats; +struct t_logical_block_type; + +/** + * @brief Class that performs prepacking. + * + * This class maintains the prepacking state, allowing the use of molecules + * (prepacked atoms) while this object exists. After prepacking, every atom will + * be part of a molecule (with a large number being part of single-atom + * molecules). + * + * Molecules currently come from pack patterns in the architecture file. For + * example, a 3-bit carry chain in most architectures would turn into a molecule + * containing the 3 atoms forming the carry chain. + * + * To use the prepacker, call the init method with a complete atom netlist. + * Then maintain this object (do not reset or destroy it) so long as the + * molecules are needed. + * + * // Initialize device and atom netlist + * // ... + * Prepacker prepacker; + * prepacker.init(atom_ctx.nlist, device_ctx.logical_block_types); + * // ... + * // Use the prepacked molecules. + * // ... + * prepacker.reset(); // Or if the prepacker object is destroyed. + * // Prepacked molecules can no longer be used beyond this point. + * + */ +class Prepacker { +public: + // The constructor is default, the init method performs prepacking. + Prepacker() = default; + + // This class maintains pointers to internal data structures, and as such + // should not be copied or moved (prevents unsafe accesses). + Prepacker(const Prepacker&) = delete; + Prepacker& operator=(const Prepacker&) = delete; + + /** + * @brief Performs prepacking. + * + * Initializes the prepacker by performing prepacking and allocating the + * necessary data strucutres. + * + * @param atom_nlist The atom netlist to prepack. + * @param logical_block_types A list of the logical block types on the device. + */ + void init(const AtomNetlist& atom_nlist, const std::vector &logical_block_types); + + /** + * @brief Get the cluster molecule containing the given atom block. + * + * @param blk_id The atom block to get the molecule of. + */ + inline t_pack_molecule* get_atom_molecule(AtomBlockId blk_id) const { + // Safety debug to ensure the blk is valid and has a molecule entry. + VTR_ASSERT_SAFE(blk_id.is_valid() && (size_t)blk_id < atom_molecules.size()); + // Safety debug to ensure the molecule is valid + VTR_ASSERT_DEBUG(atom_molecules[blk_id] != nullptr); + return atom_molecules[blk_id]; + } + + /** + * @brief Get the expected lowest cost physical block graph node for the + * given atom block. + * + * @param blk_id The atom block to get the pb graph node of. + */ + inline t_pb_graph_node* get_expected_lowest_cost_pb_gnode(AtomBlockId blk_id) const { + // Safety debug to ensure the blk is valid and has an entry. + VTR_ASSERT_SAFE(blk_id.is_valid() && (size_t)blk_id < expected_lowest_cost_pb_gnode.size()); + // Ensure the entry is valid. + VTR_ASSERT(expected_lowest_cost_pb_gnode[blk_id] != nullptr); + return expected_lowest_cost_pb_gnode[blk_id]; + } + + /** + * @brief Returns the total number of molecules in the prepacker. + */ + inline size_t get_num_molecules() const { + size_t num_molecules = 0; + t_pack_molecule* molecule_head = list_of_pack_molecules; + for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { + ++num_molecules; + } + return num_molecules; + } + + /** + * @brief Returns all of the molecules as a vector. + */ + inline std::vector get_molecules_vector() const { + std::vector molecules; + t_pack_molecule* molecule_head = list_of_pack_molecules; + for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { + molecules.push_back(cur_molecule); + } + return molecules; + } + + /** + * @brief Marks all of the molecules as valid. + * + * Within clustering, the valid flag of a molecule is used to signify if any + * of the atoms in the molecule has been packed into a cluster yet or not. + * If any atom in the molecule has been packed, the flag will be false. + * + * This method is used before clustering to mark all the molecules as + * unpacked. + */ + inline void mark_all_molecules_valid() { + t_pack_molecule* molecule_head = list_of_pack_molecules; + for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { + cur_molecule->valid = true; + } + } + + /** + * @brief Calculates maximum molecule statistics accross all molecules, + */ + t_molecule_stats calc_max_molecule_stats(const AtomNetlist& netlist) const; + + /** + * @brief Gets the largest number of blocks (atoms) that any molecule contains. + */ + inline size_t get_max_molecule_size() const { + size_t max_molecule_size = 1; + t_pack_molecule* molecule_head = list_of_pack_molecules; + for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { + max_molecule_size = std::max(max_molecule_size, cur_molecule->num_blocks); + } + return max_molecule_size; + } + + /** + * @brief Resets the prepacker object. Clearing all state. + * + * This resets the prepacker, allowing it to prepack again and also freeing + * any state. + */ + void reset(); + + /// @brief Destructor of the prepacker class. Calls the reset method. + ~Prepacker() { reset(); } + +private: + /** + * @brief A linked list of all the packing molecules that are loaded in + * prepacking stage. + * + * All of the molecules in the prepacker are allocated into this linked list + * and must be freed eventually. + * + * TODO: Should use a vtr::vector instead of a linked list for storage. Then + * instead of pointers, IDs can be used to manipulate the molecules + * which would be safer. + */ + t_pack_molecule* list_of_pack_molecules = nullptr; + + /** + * @brief The molecules associated with each atom block. + * + * This vector is loaded in the init method and cleared in the reset method. + * The pointers in this vector are shared with list_of_pack_molecules. + */ + vtr::vector atom_molecules; + + /// @brief A vector of the expected lowest cost physical block graph node. + vtr::vector expected_lowest_cost_pb_gnode; -std::vector alloc_and_load_pack_patterns(); -void free_list_of_pack_patterns(std::vector& list_of_pack_patterns); -void free_pack_pattern(t_pack_patterns* pack_pattern); + /// @brief A list of the pack patterns used for prepacking. I think the + /// molecules keep pointers to this vector, so this needs to remain + /// for the lifetime of the molecules. + std::vector list_of_pack_patterns; +}; -t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_patterns, - std::unordered_map& expected_lowest_cost_pb_gnode, - const int num_packing_patterns); #endif diff --git a/vpr/src/timing/PreClusterDelayCalculator.h b/vpr/src/timing/PreClusterDelayCalculator.h index 54e97e66a1a..a4177fd6126 100644 --- a/vpr/src/timing/PreClusterDelayCalculator.h +++ b/vpr/src/timing/PreClusterDelayCalculator.h @@ -4,6 +4,7 @@ #include "tatum/Time.hpp" #include "tatum/delay_calc/DelayCalculator.hpp" +#include "tatum/TimingGraph.hpp" #include "vpr_error.h" #include "vpr_utils.h" @@ -11,17 +12,18 @@ #include "atom_netlist.h" #include "atom_lookup.h" #include "physical_types.h" +#include "prepack.h" class PreClusterDelayCalculator : public tatum::DelayCalculator { public: PreClusterDelayCalculator(const AtomNetlist& netlist, const AtomLookup& netlist_lookup, float intercluster_net_delay, - std::unordered_map expected_lowest_cost_pb_gnode) + const Prepacker& prepacker) : netlist_(netlist) , netlist_lookup_(netlist_lookup) , inter_cluster_net_delay_(intercluster_net_delay) - , block_to_pb_gnode_(expected_lowest_cost_pb_gnode) { + , prepacker_(prepacker) { //nop } @@ -132,11 +134,7 @@ class PreClusterDelayCalculator : public tatum::DelayCalculator { const t_pb_graph_pin* find_pb_graph_pin(const AtomPinId pin) const { AtomBlockId blk = netlist_.pin_block(pin); - auto iter = block_to_pb_gnode_.find(blk); - VTR_ASSERT(iter != block_to_pb_gnode_.end()); - - const t_pb_graph_node* pb_gnode = iter->second; - VTR_ASSERT(pb_gnode); + const t_pb_graph_node* pb_gnode = prepacker_.get_expected_lowest_cost_pb_gnode(blk); AtomPortId port = netlist_.pin_port(pin); const t_model_ports* model_port = netlist_.port_model(port); @@ -165,7 +163,7 @@ class PreClusterDelayCalculator : public tatum::DelayCalculator { const AtomNetlist& netlist_; const AtomLookup& netlist_lookup_; const float inter_cluster_net_delay_; - const std::unordered_map block_to_pb_gnode_; + const Prepacker& prepacker_; }; #endif diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index f4c1955e84b..0285a42a5da 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -1556,30 +1556,27 @@ void revalid_molecules(const t_pb* pb) { atom_ctx.lookup.set_atom_clb(blk_id, ClusterBlockId::INVALID()); atom_ctx.lookup.set_atom_pb(blk_id, nullptr); - auto rng = atom_ctx.atom_molecules.equal_range(blk_id); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* cur_molecule = kv.second; - if (cur_molecule->valid == false) { - int i; - for (i = 0; i < get_array_size_of_molecule(cur_molecule); i++) { - if (cur_molecule->atom_block_ids[i]) { - if (atom_ctx.lookup.atom_clb(cur_molecule->atom_block_ids[i]) != ClusterBlockId::INVALID()) { - break; - } + t_pack_molecule* cur_molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + if (cur_molecule->valid == false) { + int i; + for (i = 0; i < get_array_size_of_molecule(cur_molecule); i++) { + if (cur_molecule->atom_block_ids[i]) { + if (atom_ctx.lookup.atom_clb(cur_molecule->atom_block_ids[i]) != ClusterBlockId::INVALID()) { + break; } } - /* All atom blocks are open for this molecule, place back in queue */ - if (i == get_array_size_of_molecule(cur_molecule)) { - cur_molecule->valid = true; - // when invalidating a molecule check if it's a chain molecule - // that is part of a long chain. If so, check if this molecule - // have modified the chain_id value based on the stale packing - // then reset the chain id and the first packed molecule pointer - // this is packing is being reset - if (cur_molecule->is_chain() && cur_molecule->chain_info->is_long_chain && cur_molecule->chain_info->first_packed_molecule == cur_molecule) { - cur_molecule->chain_info->first_packed_molecule = nullptr; - cur_molecule->chain_info->chain_id = -1; - } + } + /* All atom blocks are open for this molecule, place back in queue */ + if (i == get_array_size_of_molecule(cur_molecule)) { + cur_molecule->valid = true; + // when invalidating a molecule check if it's a chain molecule + // that is part of a long chain. If so, check if this molecule + // have modified the chain_id value based on the stale packing + // then reset the chain id and the first packed molecule pointer + // this is packing is being reset + if (cur_molecule->is_chain() && cur_molecule->chain_info->is_long_chain && cur_molecule->chain_info->first_packed_molecule == cur_molecule) { + cur_molecule->chain_info->first_packed_molecule = nullptr; + cur_molecule->chain_info->chain_id = -1; } } } diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp index 89ffce30577..1bc208bf3ba 100644 --- a/vpr/test/test_connection_router.cpp +++ b/vpr/test/test_connection_router.cpp @@ -193,8 +193,7 @@ TEST_CASE("connection_router", "[vpr]") { vpr_setup); auto& atom_ctx = g_vpr_ctx.mutable_atom(); - free_pack_molecules(atom_ctx.list_of_pack_molecules.release()); - atom_ctx.atom_molecules.clear(); + atom_ctx.prepacker.reset(); } } // namespace diff --git a/vpr/test/test_post_verilog.cpp b/vpr/test/test_post_verilog.cpp index 2222f861eac..be4bd45f045 100644 --- a/vpr/test/test_post_verilog.cpp +++ b/vpr/test/test_post_verilog.cpp @@ -37,8 +37,7 @@ void do_vpr_flow(const char* input_unc_opt, const char* output_unc_opt) { auto& atom_ctx = g_vpr_ctx.mutable_atom(); - free_pack_molecules(atom_ctx.list_of_pack_molecules.release()); - atom_ctx.atom_molecules.clear(); + atom_ctx.prepacker.reset(); REQUIRE(flow_succeeded == true); } diff --git a/vpr/test/test_vpr.cpp b/vpr/test/test_vpr.cpp index 4945c56c001..da0b4c8b21c 100644 --- a/vpr/test/test_vpr.cpp +++ b/vpr/test/test_vpr.cpp @@ -171,8 +171,7 @@ TEST_CASE("read_rr_graph_metadata", "[vpr]") { vpr_free_all(arch, vpr_setup); auto& atom_ctx = g_vpr_ctx.mutable_atom(); - free_pack_molecules(atom_ctx.list_of_pack_molecules.release()); - atom_ctx.atom_molecules.clear(); + atom_ctx.prepacker.reset(); } REQUIRE(src_inode != -1); @@ -236,8 +235,7 @@ TEST_CASE("read_rr_graph_metadata", "[vpr]") { vpr_free_all(arch, vpr_setup); auto& atom_ctx = g_vpr_ctx.mutable_atom(); - free_pack_molecules(atom_ctx.list_of_pack_molecules.release()); - atom_ctx.atom_molecules.clear(); + atom_ctx.prepacker.reset(); } } // namespace