diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8404082f25e..8035b7e0834 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -141,6 +141,11 @@ jobs: params: '-DCMAKE_COMPILE_WARNING_AS_ERROR=on -DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on', suite: 'vtr_reg_basic' }, + { + name: 'Basic with highest assertion level', + params: '-DCMAKE_COMPILE_WARNING_AS_ERROR=on -DVTR_ASSERT_LEVEL=4 -DWITH_BLIFEXPLORER=on', + suite: 'vtr_reg_basic' + }, { name: 'Basic_odin', params: '-DCMAKE_COMPILE_WARNING_AS_ERROR=on -DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on -DWITH_PARMYS=OFF -DWITH_ODIN=on', diff --git a/.gitignore b/.gitignore index 640bd34b00a..3582f2ee54d 100644 --- a/.gitignore +++ b/.gitignore @@ -144,6 +144,7 @@ tags # .vscode .history +.cache #eclipse project .project @@ -153,4 +154,4 @@ tags .idea cmake-build-debug cmake-build-release -/.metadata/ \ No newline at end of file +/.metadata/ diff --git a/libs/libarchfpga/src/parse_switchblocks.cpp b/libs/libarchfpga/src/parse_switchblocks.cpp index 182d194c5d7..c0b55bcb7fd 100644 --- a/libs/libarchfpga/src/parse_switchblocks.cpp +++ b/libs/libarchfpga/src/parse_switchblocks.cpp @@ -65,6 +65,9 @@ static void parse_comma_separated_wire_points(const char* ch, std::vector */ void read_sb_switchfuncs(pugi::xml_node Node, t_switchblock_inf* sb, const pugiutil::loc_data& loc_data) { @@ -300,34 +367,8 @@ void read_sb_switchfuncs(pugi::xml_node Node, t_switchblock_inf* sb, const pugiu func_formula = get_attribute(SubElem, "formula", loc_data).as_string(nullptr); /* go through all the possible cases of func_type */ - if (0 == strcmp(func_type, "lt")) { - conn.set_sides(LEFT, TOP); - } else if (0 == strcmp(func_type, "lr")) { - conn.set_sides(LEFT, RIGHT); - } else if (0 == strcmp(func_type, "lb")) { - conn.set_sides(LEFT, BOTTOM); - } else if (0 == strcmp(func_type, "tl")) { - conn.set_sides(TOP, LEFT); - } else if (0 == strcmp(func_type, "tb")) { - conn.set_sides(TOP, BOTTOM); - } else if (0 == strcmp(func_type, "tr")) { - conn.set_sides(TOP, RIGHT); - } else if (0 == strcmp(func_type, "rt")) { - conn.set_sides(RIGHT, TOP); - } else if (0 == strcmp(func_type, "rl")) { - conn.set_sides(RIGHT, LEFT); - } else if (0 == strcmp(func_type, "rb")) { - conn.set_sides(RIGHT, BOTTOM); - } else if (0 == strcmp(func_type, "bl")) { - conn.set_sides(BOTTOM, LEFT); - } else if (0 == strcmp(func_type, "bt")) { - conn.set_sides(BOTTOM, TOP); - } else if (0 == strcmp(func_type, "br")) { - conn.set_sides(BOTTOM, RIGHT); - } else { - /* unknown permutation function */ - archfpga_throw(__FILE__, __LINE__, "Unknown permutation function specified: %s\n", func_type); - } + set_switch_func_type(conn, func_type); + func_ptr = &(sb->permutation_map[conn]); /* Here we load the specified switch function(s) */ @@ -404,8 +445,8 @@ static void check_bidir_switchblock(const t_permutation_map* permutation_map) { SB_Side_Connection conn; /* iterate over all combinations of from_side -> to side */ - for (e_side from_side : {TOP, RIGHT, BOTTOM, LEFT}) { - for (e_side to_side : {TOP, RIGHT, BOTTOM, LEFT}) { + for (e_side from_side : TOTAL_2D_SIDES) { + for (e_side to_side : TOTAL_2D_SIDES) { /* can't connect a switchblock side to itself */ if (from_side == to_side) { continue; diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index 949024ada33..628659b1737 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -176,10 +176,17 @@ enum e_side : unsigned char { RIGHT = 1, BOTTOM = 2, LEFT = 3, - NUM_SIDES + NUM_2D_SIDES = 4, + ABOVE = 5, + UNDER = 7, + NUM_3D_SIDES = 6, }; -constexpr std::array SIDES = {{TOP, RIGHT, BOTTOM, LEFT}}; //Set of all side orientations -constexpr std::array SIDE_STRING = {{"TOP", "RIGHT", "BOTTOM", "LEFT"}}; //String versions of side orientations + +constexpr std::array TOTAL_2D_SIDES = {{TOP, RIGHT, BOTTOM, LEFT}}; //Set of all side orientations +constexpr std::array TOTAL_2D_SIDE_STRINGS = {{"TOP", "RIGHT", "BOTTOM", "LEFT"}}; //String versions of side orientations + +constexpr std::array TOTAL_3D_SIDES = {{TOP, RIGHT, BOTTOM, LEFT, ABOVE, UNDER}}; //Set of all side orientations including different layers +constexpr std::array TOTAL_3D_SIDE_STRINGS = {{"TOP", "RIGHT", "BOTTOM", "LEFT", "ABOVE", "UNDER"}}; //String versions of side orientations including different layers /* pin location distributions */ enum class e_pin_location_distr { diff --git a/libs/libarchfpga/src/read_fpga_interchange_arch.cpp b/libs/libarchfpga/src/read_fpga_interchange_arch.cpp index 828f935369f..d1062ae0480 100644 --- a/libs/libarchfpga/src/read_fpga_interchange_arch.cpp +++ b/libs/libarchfpga/src/read_fpga_interchange_arch.cpp @@ -480,7 +480,7 @@ struct ArchReader { type.pin_height_offset.resize(type.num_pins, 0); type.pinloc.resize({1, 1, 4}, std::vector(type.num_pins, false)); - for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) { + for (e_side side : TOTAL_2D_SIDES) { for (int pin = 0; pin < type.num_pins; pin++) { type.pinloc[0][0][side][pin] = true; type.pin_width_offset[pin] = 0; diff --git a/libs/libarchfpga/src/read_xml_arch_file.cpp b/libs/libarchfpga/src/read_xml_arch_file.cpp index f3a9ffaa683..1b002195862 100644 --- a/libs/libarchfpga/src/read_xml_arch_file.cpp +++ b/libs/libarchfpga/src/read_xml_arch_file.cpp @@ -579,7 +579,7 @@ static void LoadPinLoc(pugi::xml_node Locations, int num_sides = 4 * (type->width * type->height); int side_index = 0; int count = 0; - for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) { + for (e_side side : TOTAL_2D_SIDES) { for (int width = 0; width < type->width; ++width) { for (int height = 0; height < type->height; ++height) { for (int pin_offset = 0; pin_offset < (type->num_pins / num_sides) + 1; ++pin_offset) { @@ -604,7 +604,7 @@ static void LoadPinLoc(pugi::xml_node Locations, while (ipin < type->num_pins) { for (int width = 0; width < type->width; ++width) { for (int height = 0; height < type->height; ++height) { - for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) { + for (e_side side : TOTAL_2D_SIDES) { if (((width == 0 && side == LEFT) || (height == type->height - 1 && side == TOP) || (width == type->width - 1 && side == RIGHT) @@ -645,7 +645,7 @@ static void LoadPinLoc(pugi::xml_node Locations, while (ipin < input_pins.size()) { for (int width = 0; width < type->width; ++width) { for (int height = 0; height < type->height; ++height) { - for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) { + for (e_side side : TOTAL_2D_SIDES) { if (ipin < input_pins.size()) { //Pins still to allocate @@ -668,7 +668,7 @@ static void LoadPinLoc(pugi::xml_node Locations, while (ipin < output_pins.size()) { for (int width = 0; width < type->width; ++width) { for (int height = 0; height < type->height; ++height) { - for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) { + for (e_side side : TOTAL_2D_SIDES) { if (((width == 0 && side == LEFT) || (height == type->height - 1 && side == TOP) || (width == type->width - 1 && side == RIGHT) @@ -699,8 +699,8 @@ static void LoadPinLoc(pugi::xml_node Locations, for (int layer = 0; layer < num_of_avail_layer; ++layer) { for (int width = 0; width < type->width; ++width) { for (int height = 0; height < type->height; ++height) { - for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) { - for (const auto& token : pin_locs->assignments[sub_tile_index][width][height][layer][side]) { + for (e_side side : TOTAL_2D_SIDES) { + for (auto token : pin_locs->assignments[sub_tile_index][width][height][layer][side]) { auto pin_range = ProcessPinString(Locations, &sub_tile, token.c_str(), @@ -3393,9 +3393,9 @@ static void ProcessPinLocations(pugi::xml_node Locations, for (int l = 0; l < num_of_avail_layer; ++l) { for (int w = 0; w < PhysicalTileType->width; ++w) { for (int h = 0; h < PhysicalTileType->height; ++h) { - for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) { - for (const auto& token : pin_locs->assignments[sub_tile_index][w][h][l][side]) { - InstPort inst_port(token); + for (e_side side : TOTAL_2D_SIDES) { + for (auto token : pin_locs->assignments[sub_tile_index][w][h][l][side]) { + InstPort inst_port(token.c_str()); //A pin specification should contain only the block name, and not any instance count information if (inst_port.instance_low_index() != InstPort::UNSPECIFIED || inst_port.instance_high_index() != InstPort::UNSPECIFIED) { @@ -4766,9 +4766,9 @@ static int find_switch_by_name(const t_arch& arch, const std::string& switch_nam } static e_side string_to_side(const std::string& side_str) { - e_side side = NUM_SIDES; + e_side side = NUM_2D_SIDES; if (side_str.empty()) { - side = NUM_SIDES; + side = NUM_2D_SIDES; } else if (side_str == "left") { side = LEFT; } else if (side_str == "right") { diff --git a/libs/librrgraph/src/base/check_rr_graph.cpp b/libs/librrgraph/src/base/check_rr_graph.cpp index b49af0e3324..06617fa620a 100644 --- a/libs/librrgraph/src/base/check_rr_graph.cpp +++ b/libs/librrgraph/src/base/check_rr_graph.cpp @@ -280,12 +280,12 @@ void check_rr_graph(const RRGraphView& rr_graph, rr_graph.node_layer(rr_node)}); std::string pin_name = block_type_pin_index_to_name(block_type, rr_graph.node_pin_num(rr_node), is_flat); /* Print error messages for all the sides that a node may appear */ - for (const e_side& node_side : SIDES) { + for (const e_side& node_side : TOTAL_2D_SIDES) { if (!rr_graph.is_node_on_specific_side(rr_node, node_side)) { continue; } VTR_LOG_ERROR("in check_rr_graph: node %d (%s) at (%d,%d) block=%s side=%s pin=%s has no fanin.\n", - inode, rr_graph.node_type_string(rr_node), rr_graph.node_xlow(rr_node), rr_graph.node_ylow(rr_node), block_type->name, SIDE_STRING[node_side], pin_name.c_str()); + inode, rr_graph.node_type_string(rr_node), rr_graph.node_xlow(rr_node), rr_graph.node_ylow(rr_node), block_type->name, TOTAL_2D_SIDE_STRINGS[node_side], pin_name.c_str()); } } } else { @@ -498,9 +498,13 @@ void check_rr_node(const RRGraphView& rr_graph, tracks_per_node = ((rr_type == CHANX) ? chan_width.x_list[ylow] : chan_width.y_list[xlow]); } - if (ptc_num >= nodes_per_chan) { - VPR_ERROR(VPR_ERROR_ROUTE, - "in check_rr_node: inode %d (type %d) has a ptc_num of %d.\n", inode, rr_type, ptc_num); + //if a chanx/chany has length 0, it means it is used to connect different dice together + //hence, the ptc number can be larger than nodes_per_chan + if(xlow != xhigh || ylow != yhigh) { + if (ptc_num >= nodes_per_chan) { + VPR_ERROR(VPR_ERROR_ROUTE, + "in check_rr_node: inode %d (type %d) has a ptc_num of %d.\n", inode, rr_type, ptc_num); + } } if (capacity != tracks_per_node) { diff --git a/libs/librrgraph/src/base/rr_graph_builder.cpp b/libs/librrgraph/src/base/rr_graph_builder.cpp index 1ef61ec55da..565c99e3f75 100644 --- a/libs/librrgraph/src/base/rr_graph_builder.cpp +++ b/libs/librrgraph/src/base/rr_graph_builder.cpp @@ -39,18 +39,18 @@ void RRGraphBuilder::add_node_to_all_locs(RRNodeId node) { case SOURCE: case SINK: case CHANY: - node_lookup_.add_node(node,node_layer, ix, iy, node_type, node_ptc_num, SIDES[0]); + node_lookup_.add_node(node, node_layer, ix, iy, node_type, node_ptc_num, TOTAL_2D_SIDES[0]); break; case CHANX: /* Currently need to swap x and y for CHANX because of chan, seg convention * TODO: Once the builders is reworked for use consistent (x, y) convention, * the following swapping can be removed */ - node_lookup_.add_node(node,node_layer, iy, ix, node_type, node_ptc_num, SIDES[0]); + node_lookup_.add_node(node, node_layer, iy, ix, node_type, node_ptc_num, TOTAL_2D_SIDES[0]); break; case OPIN: case IPIN: - for (const e_side& side : SIDES) { + for (const e_side& side : TOTAL_2D_SIDES) { if (node_storage_.is_node_on_specific_side(node, side)) { node_lookup_.add_node(node,node_layer, ix, iy, node_type, node_ptc_num, side); } diff --git a/libs/librrgraph/src/base/rr_graph_obj.cpp b/libs/librrgraph/src/base/rr_graph_obj.cpp index dd819770783..fae02caf26e 100644 --- a/libs/librrgraph/src/base/rr_graph_obj.cpp +++ b/libs/librrgraph/src/base/rr_graph_obj.cpp @@ -812,7 +812,7 @@ RRNodeId RRGraph::create_node(const t_rr_type& type) { node_ptc_nums_.push_back(-1); node_cost_indices_.push_back(-1); node_directions_.push_back(Direction::NONE); - node_sides_.push_back(NUM_SIDES); + node_sides_.push_back(NUM_2D_SIDES); node_Rs_.push_back(0.); node_Cs_.push_back(0.); @@ -1133,7 +1133,7 @@ void RRGraph::build_fast_node_lookup() const { if (node_type(node) == OPIN || node_type(node) == IPIN) { iside = node_side(node); } else { - iside = NUM_SIDES; + iside = NUM_2D_SIDES; } if (iside >= node_lookup_[x][y][itype][ptc].size()) { diff --git a/libs/librrgraph/src/base/rr_graph_obj.h b/libs/librrgraph/src/base/rr_graph_obj.h index 3b8f86c928a..5ad31ba7f01 100644 --- a/libs/librrgraph/src/base/rr_graph_obj.h +++ b/libs/librrgraph/src/base/rr_graph_obj.h @@ -510,7 +510,7 @@ class RRGraph { /* Find the edges connecting two nodes */ std::vector find_edges(const RRNodeId& src_node, const RRNodeId& sink_node) const; /* Find a node with given features from internal fast look-up */ - RRNodeId find_node(const short& x, const short& y, const t_rr_type& type, const int& ptc, const e_side& side = NUM_SIDES) const; + RRNodeId find_node(const short& x, const short& y, const t_rr_type& type, const int& ptc, const e_side& side = NUM_2D_SIDES) const; /* Find the number of routing tracks in a routing channel with a given coordinate */ short chan_num_tracks(const short& x, const short& y, const t_rr_type& type) const; @@ -828,7 +828,7 @@ class RRGraph { bool dirty_ = false; /* Fast look-up to search a node by its type, coordinator and ptc_num - * Indexing of fast look-up: [0..xmax][0..ymax][0..NUM_TYPES-1][0..ptc_max][0..NUM_SIDES-1] + * Indexing of fast look-up: [0..xmax][0..ymax][0..NUM_TYPES-1][0..ptc_max][0..NUM_2D_SIDES-1] */ typedef std::vector>>>> NodeLookup; mutable NodeLookup node_lookup_; diff --git a/libs/librrgraph/src/base/rr_graph_storage.cpp b/libs/librrgraph/src/base/rr_graph_storage.cpp index a7c8ce471c8..8d8dcd5ac20 100644 --- a/libs/librrgraph/src/base/rr_graph_storage.cpp +++ b/libs/librrgraph/src/base/rr_graph_storage.cpp @@ -618,13 +618,13 @@ const std::string& t_rr_graph_storage::node_direction_string(RRNodeId id) const } const char* t_rr_graph_storage::node_side_string(RRNodeId id) const { - for (const e_side& side : SIDES) { + for (const e_side& side : TOTAL_2D_SIDES) { if (is_node_on_specific_side(id, side)) { - return SIDE_STRING[side]; + return TOTAL_2D_SIDE_STRINGS[side]; } } /* Not found, return an invalid string*/ - return SIDE_STRING[NUM_SIDES]; + return TOTAL_2D_SIDE_STRINGS[NUM_2D_SIDES]; } void t_rr_graph_storage::set_node_layer(RRNodeId id, short layer) { @@ -771,10 +771,10 @@ void t_rr_graph_storage::add_node_side(RRNodeId id, e_side new_side) { if (node_type(id) != IPIN && node_type(id) != OPIN) { VTR_LOG_ERROR("Attempted to set RR node 'side' for non-channel type '%s'", node_type_string(id)); } - std::bitset side_bits = node_storage_[id].dir_side_.sides; + std::bitset side_bits = node_storage_[id].dir_side_.sides; side_bits[size_t(new_side)] = true; if (side_bits.to_ulong() > CHAR_MAX) { - VTR_LOG_ERROR("Invalid side '%s' to be added to rr node %u", SIDE_STRING[new_side], size_t(id)); + VTR_LOG_ERROR("Invalid side '%s' to be added to rr node %u", TOTAL_2D_SIDE_STRINGS[new_side], size_t(id)); } node_storage_[id].dir_side_.sides = static_cast(side_bits.to_ulong()); } diff --git a/libs/librrgraph/src/base/rr_graph_storage.h b/libs/librrgraph/src/base/rr_graph_storage.h index db791e4cda3..82c8f0b2326 100644 --- a/libs/librrgraph/src/base/rr_graph_storage.h +++ b/libs/librrgraph/src/base/rr_graph_storage.h @@ -213,7 +213,7 @@ class t_rr_graph_storage { * Developers can easily use the following codes with more flexibility * * if (rr_graph.is_node_on_specific_side(id, side)) { - * const char* side_string = SIDE_STRING[side]; + * const char* side_string = TOTAL_2D_SIDE_STRINGS[side]; * } */ const char* node_side_string(RRNodeId id) const; @@ -769,7 +769,7 @@ class t_rr_graph_storage { rr_node_typename[node_data.type_]); } // Return a vector showing only the sides that the node appears - std::bitset side_tt = node_storage[id].dir_side_.sides; + std::bitset side_tt = node_storage[id].dir_side_.sides; return side_tt[size_t(side)]; } diff --git a/libs/librrgraph/src/base/rr_graph_view.h b/libs/librrgraph/src/base/rr_graph_view.h index af5035099b5..b96c7a615b4 100644 --- a/libs/librrgraph/src/base/rr_graph_view.h +++ b/libs/librrgraph/src/base/rr_graph_view.h @@ -199,6 +199,9 @@ class RRGraphView { */ inline int node_length(RRNodeId node) const { VTR_ASSERT(node_type(node) == CHANX || node_type(node) == CHANY); + if(node_direction(node) == Direction::NONE){ + return 0; //length zero wire + } int length = 1 + node_xhigh(node) - node_xlow(node) + node_yhigh(node) - node_ylow(node); VTR_ASSERT_SAFE(length > 0); return length; @@ -264,11 +267,11 @@ class RRGraphView { int node_layer_num = node_layer(node); if (node_type(node) == OPIN || node_type(node) == IPIN) { coordinate_string += "side: ("; //add the side of the routing resource node - for (const e_side& node_side : SIDES) { + for (const e_side& node_side : TOTAL_2D_SIDES) { if (!is_node_on_specific_side(node, node_side)) { continue; } - coordinate_string += std::string(SIDE_STRING[node_side]) + ","; //add the side of the routing resource node + coordinate_string += std::string(TOTAL_2D_SIDE_STRINGS[node_side]) + ","; //add the side of the routing resource node } coordinate_string += ")"; //add the side of the routing resource node // For OPINs and IPINs the starting and ending coordinate are identical, so we can just arbitrarily assign the start to larger values diff --git a/libs/librrgraph/src/base/rr_node_types.h b/libs/librrgraph/src/base/rr_node_types.h index 3e9f89d4c82..3c3c3f91953 100644 --- a/libs/librrgraph/src/base/rr_node_types.h +++ b/libs/librrgraph/src/base/rr_node_types.h @@ -122,7 +122,7 @@ struct t_rr_rc_data { }; // This is the data type of fast lookups of an rr-node given an (rr_type, layer, x, y, and the side) -//[0..num_rr_types-1][0..num_layer-1][0..grid_width-1][0..grid_height-1][0..NUM_SIDES-1][0..max_ptc-1] +//[0..num_rr_types-1][0..num_layer-1][0..grid_width-1][0..grid_height-1][0..NUM_2D_SIDES-1][0..max_ptc-1] typedef std::array, 4>, NUM_RR_TYPES> t_rr_node_indices; #endif diff --git a/libs/librrgraph/src/base/rr_rc_data.cpp b/libs/librrgraph/src/base/rr_rc_data.cpp index fc24bf396c7..438f4c0633c 100644 --- a/libs/librrgraph/src/base/rr_rc_data.cpp +++ b/libs/librrgraph/src/base/rr_rc_data.cpp @@ -17,7 +17,7 @@ short find_create_rr_rc_data(const float R, const float C, std::vector create it + //Not found -> create it rr_rc_data.emplace_back(R, C); itr = --rr_rc_data.end(); //Iterator to inserted value diff --git a/libs/librrgraph/src/base/rr_spatial_lookup.cpp b/libs/librrgraph/src/base/rr_spatial_lookup.cpp index 57e973c0726..6959659be8b 100644 --- a/libs/librrgraph/src/base/rr_spatial_lookup.cpp +++ b/libs/librrgraph/src/base/rr_spatial_lookup.cpp @@ -16,20 +16,20 @@ RRNodeId RRSpatialLookup::find_node(int layer, * - For other types of nodes, there is no need to define a side. However, a default value * is needed when store the node in the fast look-up data structure. * Here we just arbitrary use the first side of the SIDE vector as the default value. - * We may consider to use NUM_SIDES as the default value but it will cause an increase + * We may consider to use NUM_2D_SIDES as the default value but it will cause an increase * in the dimension of the fast look-up data structure. * Please note that in the add_node function, we should keep the SAME convention! */ e_side node_side = side; if (type == IPIN || type == OPIN) { - VTR_ASSERT_MSG(side != NUM_SIDES, "IPIN/OPIN must specify desired side (can not be default NUM_SIDES)"); + VTR_ASSERT_MSG(side != NUM_2D_SIDES, "IPIN/OPIN must specify desired side (can not be default NUM_2D_SIDES)"); } else { VTR_ASSERT_SAFE(type != IPIN && type != OPIN); - node_side = SIDES[0]; + node_side = TOTAL_2D_SIDES[0]; } /* Pre-check: the layer, x, y, side and ptc should be non-negative numbers! Otherwise, return an invalid id */ - if ((layer < 0) || (x < 0) || (y < 0) || (node_side == NUM_SIDES) || (ptc < 0)) { + if ((layer < 0) || (x < 0) || (y < 0) || (node_side == NUM_2D_SIDES) || (ptc < 0)) { return RRNodeId::INVALID(); } @@ -191,9 +191,9 @@ std::vector RRSpatialLookup::find_nodes_at_all_sides(int layer, /* TODO: Consider to access the raw data like find_node() rather than calling find_node() many times, which hurts runtime */ if (rr_type == IPIN || rr_type == OPIN) { - indices.reserve(NUM_SIDES); + indices.reserve(NUM_2D_SIDES); //For pins, we need to look at all the sides of the current grid tile - for (e_side side : SIDES) { + for (e_side side : TOTAL_2D_SIDES) { RRNodeId rr_node_index = find_node(layer, x, y, rr_type, ptc, side); if (rr_node_index) { indices.push_back(rr_node_index); @@ -223,12 +223,12 @@ std::vector RRSpatialLookup::find_grid_nodes_at_all_sides(int layer, std::vector nodes; /* Reserve space to avoid memory fragmentation */ size_t num_nodes = 0; - for (e_side node_side : SIDES) { + for (e_side node_side : TOTAL_2D_SIDES) { num_nodes += find_nodes(layer,x, y, rr_type, node_side).size(); } nodes.reserve(num_nodes); - for (e_side node_side : SIDES) { + for (e_side node_side : TOTAL_2D_SIDES) { std::vector temp_nodes = find_nodes(layer,x, y, rr_type, node_side); nodes.insert(nodes.end(), temp_nodes.begin(), temp_nodes.end()); } @@ -245,7 +245,7 @@ void RRSpatialLookup::reserve_nodes(int layer, /* For non-IPIN/OPIN nodes, the side should always be the TOP side which follows the convention in find_node() API! */ if (type != IPIN && type != OPIN) { - VTR_ASSERT(side == SIDES[0]); + VTR_ASSERT(side == TOTAL_2D_SIDES[0]); } resize_nodes(layer, x, y, type, side); @@ -265,7 +265,7 @@ void RRSpatialLookup::add_node(RRNodeId node, /* For non-IPIN/OPIN nodes, the side should always be the TOP side which follows the convention in find_node() API! */ if (type != IPIN && type != OPIN) { - VTR_ASSERT(side == SIDES[0]); + VTR_ASSERT(side == TOTAL_2D_SIDES[0]); } resize_nodes(layer, x, y, type, side); @@ -293,7 +293,7 @@ bool RRSpatialLookup::remove_node(RRNodeId node, VTR_ASSERT_SAFE(y >= 0); VTR_ASSERT_SAFE(type != NUM_RR_TYPES); VTR_ASSERT_SAFE(ptc >= 0); - VTR_ASSERT_SAFE(side != NUM_SIDES); + VTR_ASSERT_SAFE(side != NUM_2D_SIDES); // Check if the node given is in the spatial lookup at the given indices if (type >= rr_node_indices_.size()) return false; diff --git a/libs/librrgraph/src/base/rr_spatial_lookup.h b/libs/librrgraph/src/base/rr_spatial_lookup.h index bae254d8459..6a4ca5f1b1c 100644 --- a/libs/librrgraph/src/base/rr_spatial_lookup.h +++ b/libs/librrgraph/src/base/rr_spatial_lookup.h @@ -73,7 +73,7 @@ class RRSpatialLookup { int y, t_rr_type type, int ptc, - e_side side = NUM_SIDES) const; + e_side side = NUM_2D_SIDES) const; /** * @brief Returns unique indices of the routing resource nodes in the bounds (xlow, ylow) to (xhigh, yhigh). @@ -96,7 +96,7 @@ class RRSpatialLookup { int yhigh, t_rr_type type, int ptc, - e_side side = NUM_SIDES) const; + e_side side = e_side::NUM_2D_SIDES) const; /** * @brief Returns the indices of the specified routing resource nodes, representing routing tracks in a channel. @@ -148,7 +148,7 @@ class RRSpatialLookup { int y, t_rr_type type, int num_nodes, - e_side side = SIDES[0]); + e_side side = TOTAL_2D_SIDES[0]); /** * @brief Register a node in the fast spatial lookup @@ -181,7 +181,7 @@ class RRSpatialLookup { int y, t_rr_type type, int ptc, - e_side side = SIDES[0]); + e_side side = TOTAL_2D_SIDES[0]); /** * @brief Remove a node in the fast spatial lookup. @@ -205,7 +205,7 @@ class RRSpatialLookup { int y, t_rr_type type, int ptc, - e_side side = SIDES[0]); + e_side side = TOTAL_2D_SIDES[0]); /** * @brief Mirror the last dimension of a look-up, i.e., a list of nodes, from a source coordinate to @@ -286,7 +286,7 @@ class RRSpatialLookup { int x, int y, t_rr_type type, - e_side side = SIDES[0]) const; + e_side side = TOTAL_2D_SIDES[0]) const; /* -- Internal data storage -- */ private: diff --git a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h index cfb34f754ec..d128bb44a00 100644 --- a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h +++ b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h @@ -6,6 +6,7 @@ * * Cmdline: uxsdcxx/uxsdcxx.py /home/mohagh18/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd * Input file: /home/mohagh18/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + * md5sum of input file: 65eddcc840064bbb91d7f4cf0b8bf821 */ @@ -1620,7 +1621,7 @@ constexpr const char *lookup_switch_type[] = {"UXSD_INVALID", "mux", "tristate", constexpr const char *lookup_segment_res_type[] = {"UXSD_INVALID", "GENERAL", "GCLK"}; constexpr const char *lookup_pin_type[] = {"UXSD_INVALID", "OPEN", "OUTPUT", "INPUT"}; constexpr const char *lookup_node_type[] = {"UXSD_INVALID", "CHANX", "CHANY", "SOURCE", "SINK", "OPIN", "IPIN"}; -constexpr const char *lookup_node_direction[] = {"UXSD_INVALID", "INC_DIR", "DEC_DIR", "BI_DIR"}; +constexpr const char *lookup_node_direction[] = {"UXSD_INVALID", "INC_DIR", "DEC_DIR", "BI_DIR", "NONE"}; constexpr const char *lookup_node_clk_res_type[] = {"UXSD_INVALID", "VIRTUAL_SINK"}; constexpr const char *lookup_loc_side[] = {"UXSD_INVALID", "LEFT", "RIGHT", "TOP", "BOTTOM", "RIGHT_LEFT", "RIGHT_BOTTOM", "RIGHT_BOTTOM_LEFT", "TOP_RIGHT", "TOP_BOTTOM", "TOP_LEFT", "TOP_RIGHT_BOTTOM", "TOP_RIGHT_LEFT", "TOP_BOTTOM_LEFT", "TOP_RIGHT_BOTTOM_LEFT", "BOTTOM_LEFT"}; @@ -1856,6 +1857,14 @@ inline enum_node_type lex_enum_node_type(const char *in, bool throw_on_invalid, inline enum_node_direction lex_enum_node_direction(const char *in, bool throw_on_invalid, const std::function * report_error){ unsigned int len = strlen(in); switch(len){ + case 4: + switch(*((triehash_uu32*)&in[0])){ + case onechar('N', 0, 32) | onechar('O', 8, 32) | onechar('N', 16, 32) | onechar('E', 24, 32): + return enum_node_direction::NONE; + break; + default: break; + } + break; case 6: switch(*((triehash_uu32*)&in[0])){ case onechar('B', 0, 32) | onechar('I', 8, 32) | onechar('_', 16, 32) | onechar('D', 24, 32): diff --git a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_capnp.h b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_capnp.h index 2e42a424944..7cfea1aa19b 100644 --- a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_capnp.h +++ b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_capnp.h @@ -6,6 +6,7 @@ * * Cmdline: uxsdcxx/uxsdcap.py /home/mohagh18/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd * Input file: /home/mohagh18/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + * md5sum of input file: 65eddcc840064bbb91d7f4cf0b8bf821 */ @@ -265,6 +266,8 @@ inline enum_node_direction conv_enum_node_direction(ucap::NodeDirection e, const return enum_node_direction::DEC_DIR; case ucap::NodeDirection::BI_DIR: return enum_node_direction::BI_DIR; + case ucap::NodeDirection::NONE: + return enum_node_direction::NONE; default: (*report_error)("Unknown enum_node_direction"); throw std::runtime_error("Unreachable!"); @@ -281,6 +284,8 @@ inline ucap::NodeDirection conv_to_enum_node_direction(enum_node_direction e) { return ucap::NodeDirection::DEC_DIR; case enum_node_direction::BI_DIR: return ucap::NodeDirection::BI_DIR; + case enum_node_direction::NONE: + return ucap::NodeDirection::NONE; default: throw std::runtime_error("Unknown enum_node_direction"); } diff --git a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h index ac0e16d1418..9dbac04ef7f 100644 --- a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h +++ b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h @@ -6,6 +6,7 @@ * * Cmdline: uxsdcxx/uxsdcxx.py /home/mohagh18/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd * Input file: /home/mohagh18/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + * md5sum of input file: 65eddcc840064bbb91d7f4cf0b8bf821 */ @@ -29,7 +30,7 @@ enum class enum_pin_type {UXSD_INVALID = 0, OPEN, OUTPUT, INPUT}; enum class enum_node_type {UXSD_INVALID = 0, CHANX, CHANY, SOURCE, SINK, OPIN, IPIN}; -enum class enum_node_direction {UXSD_INVALID = 0, INC_DIR, DEC_DIR, BI_DIR}; +enum class enum_node_direction {UXSD_INVALID = 0, INC_DIR, DEC_DIR, BI_DIR, NONE}; enum class enum_node_clk_res_type {UXSD_INVALID = 0, VIRTUAL_SINK}; diff --git a/libs/librrgraph/src/io/rr_graph.xsd b/libs/librrgraph/src/io/rr_graph.xsd index cea5a0f7b22..dfb63641cbb 100644 --- a/libs/librrgraph/src/io/rr_graph.xsd +++ b/libs/librrgraph/src/io/rr_graph.xsd @@ -241,6 +241,7 @@ + diff --git a/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h b/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h index 35ec9171ebb..6c820aeb192 100644 --- a/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h +++ b/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h @@ -750,8 +750,8 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { inode, rr_graph.node_type(node.id())); } } else { - std::bitset sides_to_add = from_uxsd_loc_side(side); - for (const e_side& side_to_add : SIDES) { + std::bitset sides_to_add = from_uxsd_loc_side(side); + for (const e_side& side_to_add : TOTAL_2D_SIDES) { if (sides_to_add[side_to_add]) { rr_graph_builder_->add_node_side(node_id, side_to_add); } @@ -768,8 +768,8 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { inline uxsd::enum_loc_side get_node_loc_side(const t_rr_node& node) final { const auto& rr_graph = (*rr_graph_); if (rr_graph.node_type(node.id()) == IPIN || rr_graph.node_type(node.id()) == OPIN) { - std::bitset sides_bitset; - for (const e_side& side : SIDES) { + std::bitset sides_bitset; + for (const e_side& side : TOTAL_2D_SIDES) { if (rr_graph.is_node_on_specific_side(node.id(), side)) { sides_bitset.set(side); } @@ -1849,9 +1849,9 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { /* Alloc the lookup table */ for (t_rr_type rr_type : RR_TYPES) { if (rr_type == CHANX) { - rr_graph_builder.node_lookup().resize_nodes(grid_.get_num_layers(),grid_.height(), grid_.width(), rr_type, NUM_SIDES); + rr_graph_builder.node_lookup().resize_nodes(grid_.get_num_layers(), grid_.height(), grid_.width(), rr_type, NUM_2D_SIDES); } else { - rr_graph_builder.node_lookup().resize_nodes(grid_.get_num_layers(),grid_.width(), grid_.height(), rr_type, NUM_SIDES); + rr_graph_builder.node_lookup().resize_nodes(grid_.get_num_layers(), grid_.width(), grid_.height(), rr_type, NUM_2D_SIDES); } } @@ -1864,8 +1864,8 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { // Enum converters from/to uxsd types - std::bitset from_uxsd_loc_side(uxsd::enum_loc_side side) { - std::bitset side_mask(0x0); + std::bitset from_uxsd_loc_side(uxsd::enum_loc_side side) { + std::bitset side_mask(0x0); switch (side) { case uxsd::enum_loc_side::TOP: side_mask.set(TOP); @@ -1936,7 +1936,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { return side_mask; } - uxsd::enum_loc_side to_uxsd_loc_side(std::bitset sides) { + uxsd::enum_loc_side to_uxsd_loc_side(std::bitset sides) { // Error out when // - the side has no valid bits // - the side is beyond the mapping range: this is to warn any changes on side truth table which may cause the mapping failed @@ -1956,6 +1956,8 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { return Direction::DEC; case uxsd::enum_node_direction::BI_DIR: return Direction::BIDIR; + case uxsd::enum_node_direction::NONE: + return Direction::NONE; default: report_error( "Invalid node direction %d", direction); @@ -1970,6 +1972,8 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { return uxsd::enum_node_direction::DEC_DIR; case Direction::BIDIR: return uxsd::enum_node_direction::BI_DIR; + case Direction::NONE: + return uxsd::enum_node_direction::NONE; default: report_error( "Invalid direction %d", direction); diff --git a/libs/librrgraph/src/utils/describe_rr_node.cpp b/libs/librrgraph/src/utils/describe_rr_node.cpp index a4982cce8f7..6383e3489cc 100644 --- a/libs/librrgraph/src/utils/describe_rr_node.cpp +++ b/libs/librrgraph/src/utils/describe_rr_node.cpp @@ -11,7 +11,6 @@ std::string describe_rr_node(const RRGraphView& rr_graph, bool is_flat) { std::string msg = vtr::string_fmt("RR node: %d", inode); - if (rr_graph.node_type(inode) == CHANX || rr_graph.node_type(inode) == CHANY) { auto cost_index = rr_graph.node_cost_index(inode); diff --git a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp index 4ea37de3f7e..d78d2dd2b3d 100644 --- a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp +++ b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp @@ -4,6 +4,7 @@ # # Cmdline: uxsdcxx/uxsdcap.py /home/mohagh18/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd # Input file: /home/mohagh18/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + # md5sum of input file: 65eddcc840064bbb91d7f4cf0b8bf821 @0xe787bf7696810419; @@ -47,6 +48,7 @@ enum NodeDirection { incDir @1; decDir @2; biDir @3; + none @4; } enum NodeClkResType { diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp index 47637885363..7b9c170fbe7 100644 --- a/utils/route_diag/src/main.cpp +++ b/utils/route_diag/src/main.cpp @@ -208,8 +208,7 @@ static void profile_source(const Netlist<>& net_list, successfully_routed = profiler.calculate_delay(RRNodeId(source_rr_node), RRNodeId(sink_rr_node), router_opts, - &delays[sink_x][sink_y], - layer_num); + &delays[sink_x][sink_y]); } if (successfully_routed) { diff --git a/vpr/src/base/SetupGrid.h b/vpr/src/base/SetupGrid.h index da9b2631c94..b65a9eb23da 100644 --- a/vpr/src/base/SetupGrid.h +++ b/vpr/src/base/SetupGrid.h @@ -12,6 +12,8 @@ #include #include "physical_types.h" +class DeviceGrid; + ///@brief Find the device satisfying the specified minimum resources /// minimum_instance_counts and target_device_utilization are not required when specifying a fixed layout DeviceGrid create_device_grid(const std::string& layout_name, diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index 2e7d273c267..fd2229b8b79 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -496,6 +496,7 @@ static void SetupRouterOpts(const t_options& Options, t_router_opts* RouterOpts) RouterOpts->generate_rr_node_overuse_report = Options.generate_rr_node_overuse_report; RouterOpts->flat_routing = Options.flat_routing; RouterOpts->has_choking_spot = Options.has_choking_spot; + RouterOpts->custom_3d_sb_fanin_fanout = Options.custom_3d_sb_fanin_fanout; RouterOpts->with_timing_analysis = Options.timing_analysis; } diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index bf51e37b059..5b77f5330a7 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2495,6 +2495,13 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("false") .show_in(argparse::ShowIn::HELP_ONLY); + route_grp.add_argument(args.custom_3d_sb_fanin_fanout, "--custom_3d_sb_fanin_fanout") + .help( + "Specifies the number of tracks that can drive a 3D switch block connection" + "and the number of tracks that can be driven by a 3D switch block connection") + .default_value("1") + .show_in(argparse::ShowIn::HELP_ONLY); + auto& route_timing_grp = parser.add_argument_group("timing-driven routing options"); route_timing_grp.add_argument(args.astar_fac, "--astar_fac") diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index de98e9c3ca8..c9aedd2fe42 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -218,6 +218,7 @@ struct t_options { argparse::ArgValue reorder_rr_graph_nodes_seed; argparse::ArgValue flat_routing; argparse::ArgValue has_choking_spot; + argparse::ArgValue custom_3d_sb_fanin_fanout; /* Timing-driven router options only */ argparse::ArgValue astar_fac; diff --git a/vpr/src/base/read_route.cpp b/vpr/src/base/read_route.cpp index 998e0c5434f..269b273ab8b 100644 --- a/vpr/src/base/read_route.cpp +++ b/vpr/src/base/read_route.cpp @@ -459,8 +459,9 @@ static void process_global_blocks(const Netlist<>& net_list, std::ifstream& fp, if (block_loc.loc.x != x || block_loc.loc.y != y) { vpr_throw(VPR_ERROR_ROUTE, filename, lineno, - "The placement coordinates (%d, %d) of %d block does not match given (%d, %d)", - x, y, block_loc.loc.x, block_loc.loc.y); + "The placement coordinates (%d,%d,%d) of %zu block does not match given (%d,%d,%d)", + x, y, layer_num, size_t(bnum), + block_loc.loc.x, block_loc.loc.y, block_loc.loc.layer); } auto pin_class = get_class_range_for_block(bnum, is_flat); @@ -496,9 +497,9 @@ static void format_coordinates(int& layer_num, int& x, int& y, std::string coord y = coords[1]; } else { VTR_ASSERT(coords.size() == 3); - layer_num = coords[0]; - x = coords[1]; - y = coords[2]; + x = coords[0]; + y = coords[1]; + layer_num = coords[2]; } } @@ -597,12 +598,12 @@ void print_route(const Netlist<>& net_list, int layer_num = rr_graph.node_layer(inode); fprintf(fp, "Node:\t%zu\t%6s (%d,%d,%d) ", size_t(inode), - rr_graph.node_type_string(inode), layer_num, ilow, jlow); + rr_graph.node_type_string(inode), ilow, jlow, layer_num); if ((ilow != rr_graph.node_xhigh(inode)) || (jlow != rr_graph.node_yhigh(inode))) - fprintf(fp, "to (%d,%d) ", rr_graph.node_xhigh(inode), - rr_graph.node_yhigh(inode)); + fprintf(fp, "to (%d,%d,%d) ", rr_graph.node_xhigh(inode), + rr_graph.node_yhigh(inode), layer_num); switch (rr_type) { case IPIN: @@ -683,11 +684,12 @@ void print_route(const Netlist<>& net_list, int iclass = get_block_pin_class_num(block_id, pin_id, is_flat); t_block_loc blk_loc; blk_loc = get_block_loc(block_id, is_flat); - fprintf(fp, "Block %s (#%zu) at (%d,%d), Pin class %d.\n", + fprintf(fp, "Block %s (#%zu) at (%d,%d,%d), Pin class %d.\n", net_list.block_name(block_id).c_str(), size_t(block_id), blk_loc.loc.x, blk_loc.loc.y, + blk_loc.loc.layer, iclass); } } diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 2bc4dd2a5f9..a8c95274552 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -15,6 +15,8 @@ #include #include +#include "cluster_util.h" +#include "vpr_context.h" #include "vtr_assert.h" #include "vtr_math.h" #include "vtr_log.h" @@ -359,9 +361,7 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a fflush(stdout); - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); auto& device_ctx = g_vpr_ctx.mutable_device(); - helper_ctx.lb_type_rr_graphs = vpr_setup->PackerRRGraph; device_ctx.pad_loc_type = vpr_setup->PlacerOpts.pad_loc_type; } @@ -613,12 +613,13 @@ bool vpr_pack_flow(t_vpr_setup& vpr_setup, const t_arch& arch) { //Load a previous packing from the .net file vpr_load_packing(vpr_setup, arch); - //Load cluster_constraints data structure here since loading pack file - load_cluster_constraints(); } } + // Load cluster_constraints data structure. + load_cluster_constraints(); + /* Sanity check the resulting netlist */ check_netlist(packer_opts.pack_verbosity); @@ -696,6 +697,7 @@ void vpr_load_packing(t_vpr_setup& vpr_setup, const t_arch& arch) { "Must have valid .net filename to load packing"); auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); /* Ensure we have a clean start with void net remapping information */ cluster_ctx.post_routing_clb_pin_nets.clear(); @@ -706,8 +708,11 @@ void vpr_load_packing(t_vpr_setup& vpr_setup, const t_arch& arch) { vpr_setup.FileNameOpts.verify_file_digests, vpr_setup.PackerOpts.pack_verbosity); + /* Load the mapping between clusters and their atoms */ + init_clb_atoms_lookup(cluster_ctx.atoms_lookup, atom_ctx, cluster_ctx.clb_nlist); + process_constant_nets(g_vpr_ctx.mutable_atom().nlist, - g_vpr_ctx.atom().lookup, + atom_ctx.lookup, cluster_ctx.clb_nlist, vpr_setup.constant_net_method, vpr_setup.PackerOpts.pack_verbosity); diff --git a/vpr/src/base/vpr_constraints_writer.cpp b/vpr/src/base/vpr_constraints_writer.cpp index 0de68549aec..29842028754 100644 --- a/vpr/src/base/vpr_constraints_writer.cpp +++ b/vpr/src/base/vpr_constraints_writer.cpp @@ -7,17 +7,15 @@ #include "vpr_constraints_serializer.h" #include "vpr_constraints_uxsdcxx.h" -#include "vtr_time.h" +#include "vpr_context.h" #include "globals.h" #include "pugixml.hpp" -#include "pugixml_util.hpp" -#include "clustered_netlist_utils.h" #include +#include #include "vpr_constraints_writer.h" #include "region.h" -#include "re_cluster_util.h" /** * @brief Create a partition with the given name and a single region. @@ -30,7 +28,6 @@ static Partition create_partition(const std::string& part_name, const Region& re void write_vpr_floorplan_constraints(const char* file_name, int expand, bool subtile, int horizontal_partitions, int vertical_partitions) { VprConstraints constraints; - if (horizontal_partitions != 0 && vertical_partitions != 0) { setup_vpr_floorplan_constraints_cutpoints(constraints, horizontal_partitions, vertical_partitions); } else { @@ -83,8 +80,7 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex part.set_part_region(pr); constraints.mutable_place_constraints().add_partition(part); - const std::unordered_set& atoms = cluster_to_atoms(blk_id); - + const std::unordered_set& atoms = cluster_ctx.atoms_lookup[blk_id]; for (AtomBlockId atom_id : atoms) { constraints.mutable_place_constraints().add_constrained_atom(atom_id, partid); } @@ -92,7 +88,9 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex } } -void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int horizontal_cutpoints, int vertical_cutpoints) { +void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, + int horizontal_cutpoints, + int vertical_cutpoints) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& block_locs = g_vpr_ctx.placement().block_locs(); auto& device_ctx = g_vpr_ctx.device(); @@ -158,7 +156,7 @@ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int * appropriate region accordingly */ for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { - const std::unordered_set& atoms = cluster_to_atoms(blk_id); + const std::unordered_set& atoms = cluster_ctx.atoms_lookup[blk_id]; int x = block_locs[blk_id].loc.x; int y = block_locs[blk_id].loc.y; int width = device_ctx.grid.width(); diff --git a/vpr/src/base/vpr_constraints_writer.h b/vpr/src/base/vpr_constraints_writer.h index 25dd7fc08ce..ddfcd259b43 100644 --- a/vpr/src/base/vpr_constraints_writer.h +++ b/vpr/src/base/vpr_constraints_writer.h @@ -25,6 +25,8 @@ #ifndef VPR_SRC_BASE_VPR_CONSTRAINTS_WRITER_H_ #define VPR_SRC_BASE_VPR_CONSTRAINTS_WRITER_H_ +class VprConstraints; + /** * @brief Write out floorplan constraints to an XML file based on current placement * @@ -35,7 +37,11 @@ * @param subtile Specifies whether to write out the constraint regions with or without * subtile values. */ -void write_vpr_floorplan_constraints(const char* file_name, int expand, bool subtile, int horizontal_partitions, int vertical_partitions); +void write_vpr_floorplan_constraints(const char* file_name, + int expand, + bool subtile, + int horizontal_partitions, + int vertical_partitions); /** * @brief Populates VprConstraints by creating a partition for each clustered block. @@ -50,7 +56,9 @@ void write_vpr_floorplan_constraints(const char* file_name, int expand, bool sub * @param subtile Specifies whether to write out the constraint regions with or without * subtile values. */ -void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int expand, bool subtile); +void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, + int expand, + bool subtile); /** * @brief Populates VprConstraints by dividing the grid into multiple partitions. @@ -62,6 +70,8 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex * @param horizontal_cutpoints The number of horizontal cut-lines. * @param vertical_cutpoints The number of vertical cut_lines. */ -void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int horizontal_cutpoints, int vertical_cutpoints); +void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, + int horizontal_cutpoints, + int vertical_cutpoints); #endif /* VPR_SRC_BASE_VPR_CONSTRAINTS_WRITER_H_ */ diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 7ff7205024a..f69c58e94ab 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -10,6 +10,7 @@ #include "vtr_ndmatrix.h" #include "vtr_optional.h" #include "vtr_vector.h" +#include "vtr_vector_map.h" #include "atom_netlist.h" #include "clustered_netlist.h" #include "rr_graph_view.h" @@ -78,12 +79,6 @@ struct AtomContext : public Context { /// @brief Mappings to/from the Atom Netlist to physically described .blif models AtomLookup lookup; - - /// @brief Prepacker object which performs prepacking and stores the pack - /// molecules. Has a method to get the pack molecule of an AtomBlock. - /// TODO: This is mainly only used in the clusterer. It can probably be - /// removed from the AtomContext entirely. - Prepacker prepacker; }; /** @@ -286,69 +281,23 @@ struct ClusteringContext : public Context { * CLB Netlist ********************************************************************/ - ///@brief New netlist class derived from Netlist + /// @brief New netlist class derived from Netlist ClusteredNetlist clb_nlist; - /* Database for nets of each clb block pin after routing stage - * - post_routing_clb_pin_nets: - * mapping of pb_type pins to clustered net ids - * - pre_routing_net_pin_mapping: - * a copy of mapping for current pb_route index to previous pb_route index - * Record the previous pin mapping for finding the correct pin index during timing analysis - */ + /// @brief Database for nets of each clb block pin after routing stage. + /// - post_routing_clb_pin_nets: + /// mapping of pb_type pins to clustered net ids. + /// - pre_routing_net_pin_mapping: + /// a copy of mapping for current pb_route index to previous pb_route index + /// Record the previous pin mapping for finding the correct pin index during + /// timing analysis. std::map> post_routing_clb_pin_nets; std::map> pre_routing_net_pin_mapping; -}; - -/** - * @brief State relating to helper data structure using in the clustering stage - * - * This should contain helper data structures that are useful in the clustering/packing stage. - * They are encapsulated here as they are useful in clustering and reclustering algorithms that may be used - * in packing or placement stages. - */ -struct ClusteringHelperContext : public Context { - // A map used to save the number of used instances from each logical block type. - std::map num_used_type_instances; - - // Stats keeper for placement information during packing/clustering - t_cluster_placement_stats* cluster_placement_stats; - - // total number of models in the architecture - int num_models; - - int max_cluster_size; - t_pb_graph_node** primitives_list; - bool enable_pin_feasibility_filter; - int feasible_block_array_size; - - // total number of CLBs - int total_clb_num; - - // A vector of routing resource nodes within each of logic cluster_ctx.blocks types [0 .. num_logical_block_type-1] - std::vector* lb_type_rr_graphs; - - // the utilization of external input/output pins during packing (between 0 and 1) - t_ext_pin_util_targets target_external_pin_util; - - // During clustering, a block is related to un-clustered primitives with nets. - // This relation has three types: low fanout, high fanout, and transitive - // high_fanout_thresholds stores the threshold for nets to a block type to be considered high fanout - t_pack_high_fanout_thresholds high_fanout_thresholds; - - // A vector of unordered_sets of AtomBlockIds that are inside each clustered block [0 .. num_clustered_blocks-1] - // unordered_set for faster insertion/deletion during the iterative improvement process of packing + /// @brief A vector of unordered_sets of AtomBlockIds that are inside each + /// clustered block [0 .. num_clustered_blocks-1] + /// This is populated when the packing is loaded. vtr::vector> atoms_lookup; - - /** Stores the NoC group ID of each atom block. Atom blocks that belong - * to different NoC groups can't be clustered with each other into the - * same clustered block.*/ - vtr::vector atom_noc_grp_id; - - ~ClusteringHelperContext() { - delete[] primitives_list; - } }; /** @@ -728,9 +677,6 @@ class VprContext : public Context { const ClusteringContext& clustering() const { return clustering_; } ClusteringContext& mutable_clustering() { return clustering_; } - const ClusteringHelperContext& cl_helper() const { return helper_; } - ClusteringHelperContext& mutable_cl_helper() { return helper_; } - const PlacementContext& placement() const { return placement_; } PlacementContext& mutable_placement() { return placement_; } @@ -760,8 +706,6 @@ class VprContext : public Context { PowerContext power_; ClusteringContext clustering_; - ClusteringHelperContext helper_; - PlacementContext placement_; RoutingContext routing_; FloorplanningContext constraints_; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index e169a9e82a5..b63a2f7d501 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -168,15 +168,6 @@ enum class e_cluster_seed { BLEND2 }; -enum class e_block_pack_status { - BLK_PASSED, - BLK_FAILED_FEASIBLE, - BLK_FAILED_ROUTE, - BLK_FAILED_FLOORPLANNING, - BLK_FAILED_NOC_GROUP, - BLK_STATUS_UNDEFINED -}; - struct t_ext_pin_util { t_ext_pin_util() = default; t_ext_pin_util(float in, float out) @@ -1412,6 +1403,8 @@ struct t_router_opts { bool flat_routing; bool has_choking_spot; + int custom_3d_sb_fanin_fanout = 1; + bool with_timing_analysis; // Options related to rr_node reordering, for testing and possible cache optimization diff --git a/vpr/src/draw/draw_rr.cpp b/vpr/src/draw/draw_rr.cpp index f2b573f69e7..02645f6baf5 100644 --- a/vpr/src/draw/draw_rr.cpp +++ b/vpr/src/draw/draw_rr.cpp @@ -563,7 +563,7 @@ void draw_rr_pin(RRNodeId inode, const ezgl::color& color, ezgl::renderer* g) { /* As nodes may appear on more than one side, walk through the possible nodes * - draw the pin on each side that it appears */ - for (const e_side& pin_side : SIDES) { + for (const e_side& pin_side : TOTAL_2D_SIDES) { if (!rr_graph.is_node_on_specific_side(RRNodeId(inode), pin_side)) { continue; } @@ -719,7 +719,7 @@ RRNodeId draw_check_rr_node_hit(float click_x, float click_y) { int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num}); int ipin = rr_graph.node_pin_num(inode); float xcen, ycen; - for (const e_side& iside : SIDES) { + for (const e_side& iside : TOTAL_2D_SIDES) { // If pin exists on this side of the block, then get pin coordinates if (type->pinloc[width_offset][height_offset][size_t(iside)][ipin]) { draw_get_rr_pin_coords(inode, &xcen, &ycen, iside); @@ -926,7 +926,8 @@ void draw_get_rr_pin_coords(const t_rr_node& node, float* xcen, float* ycen, con default: vpr_throw(VPR_ERROR_OTHER, __FILE__, __LINE__, - "in draw_get_rr_pin_coords: Unexpected side.\n"); + "in draw_get_rr_pin_coords: Unexpected side %s.\n", + TOTAL_2D_SIDE_STRINGS[pin_side]); break; } diff --git a/vpr/src/draw/draw_rr_edges.cpp b/vpr/src/draw/draw_rr_edges.cpp index d8aee31c89b..274e02e0fb3 100644 --- a/vpr/src/draw/draw_rr_edges.cpp +++ b/vpr/src/draw/draw_rr_edges.cpp @@ -103,7 +103,7 @@ void draw_chany_to_chany_edge(RRNodeId from_node, RRNodeId to_node, short switch if (rr_graph.node_direction(to_node) == Direction::INC) { /* INC wire starts at bottom edge */ y2 = to_chan.bottom(); - /* since no U-turns from_track must be INC as well */ + /* since no U-turns from_tracks must be INC as well */ y1 = draw_coords->tile_y[to_ylow - 1] + draw_coords->get_tile_width(); } else { /* DEC wire starts at top edge */ @@ -194,7 +194,7 @@ void draw_chanx_to_chanx_edge(RRNodeId from_node, RRNodeId to_node, short switch if (rr_graph.node_direction(to_node) == Direction::INC) { /* INC wire starts at leftmost edge */ VTR_ASSERT(from_xlow < to_xlow); x2 = to_chan.left(); - /* since no U-turns from_track must be INC as well */ + /* since no U-turns from_tracks must be INC as well */ x1 = draw_coords->tile_x[to_xlow - 1] + draw_coords->get_tile_width(); } else { /* DEC wire starts at rightmost edge */ @@ -324,7 +324,7 @@ void draw_pin_to_pin(RRNodeId opin_node, RRNodeId ipin_node, ezgl::renderer* g) */ float x1 = 0, y1 = 0; std::vector opin_candidate_sides; - for (const e_side& opin_candidate_side : SIDES) { + for (const e_side& opin_candidate_side : TOTAL_2D_SIDES) { if (rr_graph.is_node_on_specific_side(opin_node, opin_candidate_side)) { opin_candidate_sides.push_back(opin_candidate_side); } @@ -334,7 +334,7 @@ void draw_pin_to_pin(RRNodeId opin_node, RRNodeId ipin_node, ezgl::renderer* g) float x2 = 0, y2 = 0; std::vector ipin_candidate_sides; - for (const e_side& ipin_candidate_side : SIDES) { + for (const e_side& ipin_candidate_side : TOTAL_2D_SIDES) { if (rr_graph.is_node_on_specific_side(ipin_node, ipin_candidate_side)) { ipin_candidate_sides.push_back(ipin_candidate_side); } @@ -355,7 +355,7 @@ void draw_pin_to_sink(RRNodeId ipin_node, RRNodeId sink_node, ezgl::renderer* g) float x1 = 0, y1 = 0; /* Draw the line for each ipin on different sides */ - for (const e_side& pin_side : SIDES) { + for (const e_side& pin_side : TOTAL_2D_SIDES) { if (!rr_graph.is_node_on_specific_side(ipin_node, pin_side)) { continue; } @@ -381,7 +381,7 @@ void draw_source_to_pin(RRNodeId source_node, RRNodeId opin_node, ezgl::renderer draw_get_rr_src_sink_coords(rr_graph.rr_nodes()[size_t(source_node)], &x1, &y1); /* Draw the line for each ipin on different sides */ - for (const e_side& pin_side : SIDES) { + for (const e_side& pin_side : TOTAL_2D_SIDES) { if (!rr_graph.is_node_on_specific_side(opin_node, pin_side)) { continue; } @@ -455,7 +455,7 @@ void draw_pin_to_chan_edge(RRNodeId pin_node, RRNodeId chan_node, ezgl::renderer * the actual offset of the pin in the context of grid width and height */ std::vector pin_candidate_sides; - for (const e_side& pin_candidate_side : SIDES) { + for (const e_side& pin_candidate_side : TOTAL_2D_SIDES) { if ((rr_graph.is_node_on_specific_side(pin_node, pin_candidate_side)) && (grid_type->pinloc[width_offset][height_offset][pin_candidate_side][rr_graph.node_pin_num(pin_node)])) { pin_candidate_sides.push_back(pin_candidate_side); @@ -464,7 +464,7 @@ void draw_pin_to_chan_edge(RRNodeId pin_node, RRNodeId chan_node, ezgl::renderer /* Only 1 side will be picked in the end * Any rr_node of a grid should have at least 1 side!!! */ - e_side pin_side = NUM_SIDES; + e_side pin_side = NUM_2D_SIDES; const t_rr_type channel_type = rr_graph.node_type(chan_node); if (1 == pin_candidate_sides.size()) { pin_side = pin_candidate_sides[0]; @@ -483,7 +483,7 @@ void draw_pin_to_chan_edge(RRNodeId pin_node, RRNodeId chan_node, ezgl::renderer VTR_ASSERT(pin_candidate_sides.end() != std::find(pin_candidate_sides.begin(), pin_candidate_sides.end(), pin_side)); } /* Sanity check */ - VTR_ASSERT(NUM_SIDES != pin_side); + VTR_ASSERT(NUM_2D_SIDES != pin_side); /* Now we determine which side to be used, calculate the offset for the pin to be drawn * - For the pin locates above/right to the grid (at the top/right side), diff --git a/vpr/src/draw/search_bar.cpp b/vpr/src/draw/search_bar.cpp index 2842577b2fa..756dfd9d976 100644 --- a/vpr/src/draw/search_bar.cpp +++ b/vpr/src/draw/search_bar.cpp @@ -251,7 +251,7 @@ void auto_zoom_rr_node(RRNodeId rr_node_id) { int ipin = rr_graph.node_ptc_num(rr_node_id); float xcen, ycen; - for (const e_side& iside : SIDES) { + for (const e_side& iside : TOTAL_2D_SIDES) { if (type->pinloc[width_offset][height_offset][size_t(iside)][ipin]) { draw_get_rr_pin_coords(rr_node_id, &xcen, &ycen, iside); rr_node = {{xcen - draw_coords->pin_size, ycen - draw_coords->pin_size}, diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp index a5ee38b8d0c..607e4b530f3 100644 --- a/vpr/src/pack/cluster.cpp +++ b/vpr/src/pack/cluster.cpp @@ -44,11 +44,10 @@ #include "PreClusterDelayCalculator.h" #include "atom_netlist.h" -#include "cluster_router.h" +#include "cluster_legalizer.h" #include "cluster_util.h" #include "constraints_report.h" #include "globals.h" -#include "pack_types.h" #include "prepack.h" #include "timing_info.h" #include "vpr_types.h" @@ -70,13 +69,14 @@ std::map do_clustering(const t_packer_opts& pa const t_analysis_opts& analysis_opts, const t_arch* arch, Prepacker& prepacker, + ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, const std::unordered_set& is_global, bool allow_unrelated_clustering, bool balance_block_type_utilization, - std::vector* lb_type_rr_graphs, AttractionInfo& attraction_groups, bool& floorplan_regions_overfull, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, t_clustering_data& clustering_data) { /* Does the actual work of clustering multiple netlist blocks * * into clusters. */ @@ -102,7 +102,7 @@ std::map do_clustering(const t_packer_opts& pa t_cluster_progress_stats cluster_stats; //int num_molecules, num_molecules_processed, mols_since_last_print, blocks_since_last_analysis, - int num_blocks_hill_added, max_pb_depth, detailed_routing_stage; + int num_blocks_hill_added; const int verbosity = packer_opts.pack_verbosity; @@ -116,17 +116,11 @@ std::map do_clustering(const t_packer_opts& pa enum e_block_pack_status block_pack_status; - t_cluster_placement_stats* cur_cluster_placement_stats_ptr; - t_lb_router_data* router_data = nullptr; + t_cluster_placement_stats* cur_cluster_placement_stats_ptr = nullptr; t_pack_molecule *istart, *next_molecule, *prev_molecule; auto& atom_ctx = g_vpr_ctx.atom(); auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - - helper_ctx.enable_pin_feasibility_filter = packer_opts.enable_pin_feasibility_filter; - helper_ctx.feasible_block_array_size = packer_opts.feasible_block_array_size; std::shared_ptr clustering_delay_calc; std::shared_ptr timing_info; @@ -141,30 +135,25 @@ std::map do_clustering(const t_packer_opts& pa // Index 2 holds the number of LEs that are used for registers only. std::vector le_count(3, 0); - helper_ctx.total_clb_num = 0; + int total_clb_num = 0; /* TODO: This is memory inefficient, fix if causes problems */ /* Store stats on nets used by packed block, useful for determining transitively connected blocks * (eg. [A1, A2, ..]->[B1, B2, ..]->C implies cluster [A1, A2, ...] and C have a weak link) */ - vtr::vector> clb_inter_blk_nets(atom_ctx.nlist.blocks().size()); + vtr::vector> clb_inter_blk_nets(atom_ctx.nlist.blocks().size()); istart = nullptr; - /* determine bound on cluster size and primitive input size */ - helper_ctx.max_cluster_size = 0; - max_pb_depth = 0; - const t_molecule_stats max_molecule_stats = prepacker.calc_max_molecule_stats(atom_ctx.nlist); prepacker.mark_all_molecules_valid(); cluster_stats.num_molecules = prepacker.get_num_molecules(); - get_max_cluster_size_and_pb_depth(helper_ctx.max_cluster_size, max_pb_depth); - if (packer_opts.hill_climbing_flag) { - clustering_data.hill_climbing_inputs_avail = new int[helper_ctx.max_cluster_size + 1]; - for (int i = 0; i < helper_ctx.max_cluster_size + 1; i++) + size_t max_cluster_size = cluster_legalizer.get_max_cluster_size(); + clustering_data.hill_climbing_inputs_avail = new int[max_cluster_size + 1]; + for (size_t i = 0; i < max_cluster_size + 1; i++) clustering_data.hill_climbing_inputs_avail[i] = 0; } else { clustering_data.hill_climbing_inputs_avail = nullptr; /* if used, die hard */ @@ -173,8 +162,9 @@ std::map do_clustering(const t_packer_opts& pa #if 0 check_for_duplicate_inputs (); #endif + alloc_and_init_clustering(max_molecule_stats, - &(helper_ctx.cluster_placement_stats), &(helper_ctx.primitives_list), prepacker, + prepacker, clustering_data, net_output_feeds_driving_block_input, unclustered_list_head_size, cluster_stats.num_molecules); @@ -187,9 +177,6 @@ std::map do_clustering(const t_packer_opts& pa cluster_stats.blocks_since_last_analysis = 0; num_blocks_hill_added = 0; - VTR_ASSERT(helper_ctx.max_cluster_size < MAX_SHORT); - /* Limit maximum number of elements for each cluster */ - //Default criticalities set to zero (e.g. if not timing driven) vtr::vector atom_criticality(atom_ctx.nlist.blocks().size(), 0.); @@ -199,11 +186,17 @@ std::map do_clustering(const t_packer_opts& pa } // Assign gain scores to atoms and sort them based on the scores. - auto seed_atoms = initialize_seed_atoms(packer_opts.cluster_seed_type, max_molecule_stats, atom_criticality); + auto seed_atoms = initialize_seed_atoms(packer_opts.cluster_seed_type, + max_molecule_stats, + prepacker, + atom_criticality); /* index of next most timing critical block */ int seed_index = 0; - istart = get_highest_gain_seed_molecule(seed_index, seed_atoms); + istart = get_highest_gain_seed_molecule(seed_index, + seed_atoms, + prepacker, + cluster_legalizer); print_pack_status_header(); @@ -214,61 +207,58 @@ std::map do_clustering(const t_packer_opts& pa while (istart != nullptr) { bool is_cluster_legal = false; int saved_seed_index = seed_index; - for (detailed_routing_stage = (int)E_DETAILED_ROUTE_AT_END_ONLY; !is_cluster_legal && detailed_routing_stage != (int)E_DETAILED_ROUTE_INVALID; detailed_routing_stage++) { - // Use the total number created clusters so far as the ID for the new cluster - ClusterBlockId clb_index(helper_ctx.total_clb_num); - - VTR_LOGV(verbosity > 2, "Complex block %d:\n", helper_ctx.total_clb_num); - - /*Used to store cluster's PartitionRegion as primitives are added to it. - * Since some of the primitives might fail legality, this structure temporarily - * stores PartitionRegion information while the cluster is packed*/ - PartitionRegion temp_cluster_pr; - /* - * Stores the cluster's NoC group ID as more primitives are added to it. - * This is used to check if a candidate primitive is in the same NoC group - * as the atom blocks that have already been added to the primitive. - */ - NocGroupId temp_cluster_noc_grp_id = NocGroupId::INVALID(); - - start_new_cluster(helper_ctx.cluster_placement_stats, helper_ctx.primitives_list, - clb_index, istart, + // The basic algorithm: + // 1) Try to put all the molecules in that you can without doing the + // full intra-lb route. Then do full legalization at the end. + // 2) If the legalization at the end fails, try again, but this time + // do full legalization for each molecule added to the cluster. + const ClusterLegalizationStrategy legalization_strategies[] = {ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE, + ClusterLegalizationStrategy::FULL}; + for (const ClusterLegalizationStrategy strategy : legalization_strategies) { + // If the cluster is legal, no need to try a stronger cluster legalizer + // mode. + if (is_cluster_legal) + break; + // Set the legalization strategy of the cluster legalizer. + cluster_legalizer.set_legalization_strategy(strategy); + + LegalizationClusterId legalization_cluster_id; + + VTR_LOGV(verbosity > 2, "Complex block %d:\n", total_clb_num); + + start_new_cluster(cluster_legalizer, + legalization_cluster_id, + istart, num_used_type_instances, packer_opts.target_device_utilization, - helper_ctx.num_models, helper_ctx.max_cluster_size, arch, packer_opts.device_layout, - lb_type_rr_graphs, &router_data, - detailed_routing_stage, &cluster_ctx.clb_nlist, primitive_candidate_block_types, verbosity, - packer_opts.enable_pin_feasibility_filter, - balance_block_type_utilization, - packer_opts.feasible_block_array_size, - temp_cluster_pr, - temp_cluster_noc_grp_id); + balance_block_type_utilization); //initial molecule in cluster has been processed cluster_stats.num_molecules_processed++; cluster_stats.mols_since_last_print++; - print_pack_status(helper_ctx.total_clb_num, + print_pack_status(total_clb_num, cluster_stats.num_molecules, cluster_stats.num_molecules_processed, cluster_stats.mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height(), - attraction_groups); + attraction_groups, + cluster_legalizer); VTR_LOGV(verbosity > 2, - "Complex block %d: '%s' (%s) ", helper_ctx.total_clb_num, - cluster_ctx.clb_nlist.block_name(clb_index).c_str(), - cluster_ctx.clb_nlist.block_type(clb_index)->name); + "Complex block %d: '%s' (%s) ", total_clb_num, + cluster_legalizer.get_cluster_pb(legalization_cluster_id)->name, + cluster_legalizer.get_cluster_type(legalization_cluster_id)->name); VTR_LOGV(verbosity > 2, "."); //Progress dot for seed-block fflush(stdout); - t_ext_pin_util target_ext_pin_util = helper_ctx.target_external_pin_util.get_pin_util(cluster_ctx.clb_nlist.block_type(clb_index)->name); - int high_fanout_threshold = helper_ctx.high_fanout_thresholds.get_threshold(cluster_ctx.clb_nlist.block_type(clb_index)->name); - update_cluster_stats(istart, clb_index, + int high_fanout_threshold = high_fanout_thresholds.get_threshold(cluster_legalizer.get_cluster_type(legalization_cluster_id)->name); + update_cluster_stats(istart, + cluster_legalizer, is_clock, //Set of clock nets is_global, //Set of global nets (currently all clocks) packer_opts.global_clocks, @@ -278,16 +268,16 @@ std::map do_clustering(const t_packer_opts& pa *timing_info, attraction_groups, net_output_feeds_driving_block_input); - helper_ctx.total_clb_num++; + total_clb_num++; if (packer_opts.timing_driven) { cluster_stats.blocks_since_last_analysis++; /*it doesn't make sense to do a timing analysis here since there* *is only one atom block clustered it would not change anything */ } - cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]); + cur_cluster_placement_stats_ptr = cluster_legalizer.get_cluster_placement_stats(legalization_cluster_id); cluster_stats.num_unrelated_clustering_attempts = 0; - next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), + next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), attraction_groups, allow_unrelated_clustering, packer_opts.prioritize_transitive_connectivity, @@ -295,8 +285,10 @@ std::map do_clustering(const t_packer_opts& pa packer_opts.feasible_block_array_size, &cluster_stats.num_unrelated_clustering_attempts, cur_cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, clb_inter_blk_nets, - clb_index, + legalization_cluster_id, verbosity, clustering_data.unclustered_list_head, unclustered_list_head_size, @@ -322,18 +314,16 @@ std::map do_clustering(const t_packer_opts& pa while (next_molecule != nullptr && num_repeated_molecules < max_num_repeated_molecules) { prev_molecule = next_molecule; - try_fill_cluster(packer_opts, + try_fill_cluster(cluster_legalizer, + prepacker, + packer_opts, cur_cluster_placement_stats_ptr, prev_molecule, next_molecule, num_repeated_molecules, - helper_ctx.primitives_list, cluster_stats, - helper_ctx.total_clb_num, - helper_ctx.num_models, - helper_ctx.max_cluster_size, - clb_index, - detailed_routing_stage, + total_clb_num, + legalization_cluster_id, attraction_groups, clb_inter_blk_nets, allow_unrelated_clustering, @@ -341,10 +331,6 @@ std::map do_clustering(const t_packer_opts& pa is_clock, is_global, timing_info, - router_data, - target_ext_pin_util, - temp_cluster_pr, - temp_cluster_noc_grp_id, block_pack_status, clustering_data.unclustered_list_head, unclustered_list_head_size, @@ -352,16 +338,41 @@ std::map do_clustering(const t_packer_opts& pa primitive_candidate_block_types); } - is_cluster_legal = check_cluster_legality(verbosity, detailed_routing_stage, router_data); + if (strategy == ClusterLegalizationStrategy::FULL) { + // If the legalizer fully legalized for every molecule added, + // the cluster should be legal. + is_cluster_legal = true; + } else { + // If the legalizer did not check everything for every molecule, + // need to check that the full cluster is legal (need to perform + // intra-lb routing). + is_cluster_legal = cluster_legalizer.check_cluster_legality(legalization_cluster_id); + } if (is_cluster_legal) { - istart = save_cluster_routing_and_pick_new_seed(packer_opts, helper_ctx.total_clb_num, seed_atoms, num_blocks_hill_added, clustering_data.intra_lb_routing, seed_index, cluster_stats, router_data); - store_cluster_info_and_free(packer_opts, clb_index, logic_block_type, le_pb_type, le_count, clb_inter_blk_nets); + // Pick new seed. + istart = get_highest_gain_seed_molecule(seed_index, + seed_atoms, + prepacker, + cluster_legalizer); + // Update cluster stats. + if (packer_opts.timing_driven && num_blocks_hill_added > 0) + cluster_stats.blocks_since_last_analysis += num_blocks_hill_added; + + store_cluster_info_and_free(packer_opts, legalization_cluster_id, logic_block_type, le_pb_type, le_count, cluster_legalizer, clb_inter_blk_nets); + // Since the cluster will no longer be added to beyond this point, + // clean the cluster of any data not strictly necessary for + // creating the clustered netlist. + cluster_legalizer.clean_cluster(legalization_cluster_id); } else { - free_data_and_requeue_used_mols_if_illegal(clb_index, saved_seed_index, num_used_type_instances, helper_ctx.total_clb_num, seed_index); + // If the cluster is not legal, requeue used mols. + num_used_type_instances[cluster_legalizer.get_cluster_type(legalization_cluster_id)]--; + total_clb_num--; + seed_index = saved_seed_index; + // Destroy the illegal cluster. + cluster_legalizer.destroy_cluster(legalization_cluster_id); + cluster_legalizer.compress(); } - free_router_data(router_data); - router_data = nullptr; } } @@ -371,7 +382,12 @@ std::map do_clustering(const t_packer_opts& pa } //check_floorplan_regions(floorplan_regions_overfull); - floorplan_regions_overfull = floorplan_constraints_regions_overfull(); + floorplan_regions_overfull = floorplan_constraints_regions_overfull(cluster_legalizer); + + // Ensure that we have kept track of the number of clusters correctly. + // TODO: The total_clb_num variable could probably just be replaced by + // clusters().size(). + VTR_ASSERT(cluster_legalizer.clusters().size() == (size_t)total_clb_num); return num_used_type_instances; } diff --git a/vpr/src/pack/cluster.h b/vpr/src/pack/cluster.h index 76b2315ceae..a10d7ccf21a 100644 --- a/vpr/src/pack/cluster.h +++ b/vpr/src/pack/cluster.h @@ -1,32 +1,32 @@ #ifndef CLUSTER_H #define CLUSTER_H -#include #include -#include +#include #include "physical_types.h" #include "vpr_types.h" -#include "atom_netlist_fwd.h" -#include "attraction_groups.h" -#include "cluster_util.h" +class AtomNetid; +class AttractionInfo; +class ClusterLegalizer; +class ClusteredNetlist; class Prepacker; +struct t_clustering_data; std::map do_clustering(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const t_arch* arch, Prepacker& prepacker, + ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, const std::unordered_set& is_global, bool allow_unrelated_clustering, bool balance_block_type_utilization, - std::vector* lb_type_rr_graphs, AttractionInfo& attraction_groups, bool& floorplan_regions_overfull, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, t_clustering_data& clustering_data); -int get_cluster_of_block(int blkidx); - void print_pb_type_count(const ClusteredNetlist& clb_nlist); #endif diff --git a/vpr/src/pack/cluster_legalizer.cpp b/vpr/src/pack/cluster_legalizer.cpp new file mode 100644 index 00000000000..51d79744672 --- /dev/null +++ b/vpr/src/pack/cluster_legalizer.cpp @@ -0,0 +1,1786 @@ +/** + * @file + * @author Alex Singer + * @date September 2024 + * @brief The implementation of the Cluster Legalizer class. + * + * Most of the code in this file was originally part of cluster_util.cpp and was + * highly integrated with the clusterer in VPR. All code that was used for + * legalizing the clusters was moved into this file and all the functionality + * was moved into the ClusterLegalizer class. + */ + +#include "cluster_legalizer.h" +#include +#include +#include +#include +#include "atom_lookup.h" +#include "atom_netlist.h" +#include "cluster_placement.h" +#include "cluster_router.h" +#include "cluster_util.h" +#include "globals.h" +#include "logic_types.h" +#include "netlist_utils.h" +#include "noc_aware_cluster_util.h" +#include "noc_data_types.h" +#include "pack_types.h" +#include "partition.h" +#include "partition_region.h" +#include "physical_types.h" +#include "prepack.h" +#include "user_place_constraints.h" +#include "vpr_context.h" +#include "vpr_types.h" +#include "vpr_utils.h" +#include "vtr_assert.h" +#include "vtr_vector.h" +#include "vtr_vector_map.h" + +/* + * @brief Gets the max cluster size that any logical block can have. + * + * This is the maximum number of primitives any cluster can contain. + */ +static size_t calc_max_cluster_size(const std::vector& logical_block_types) { + size_t max_cluster_size = 0; + for (const t_logical_block_type& blk_type : logical_block_types) { + if (is_empty_type(&blk_type)) + continue; + int cur_cluster_size = get_max_primitives_in_pb_type(blk_type.pb_type); + max_cluster_size = std::max(max_cluster_size, cur_cluster_size); + } + return max_cluster_size; +} + +/* + * @brief Allocates the stats stored within the pb of a cluster. + * + * Used to store information used during clustering. + */ +static void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size) { + /* Call this routine when starting to fill up a new cluster. It resets * + * the gain vector, etc. */ + + pb->pb_stats = new t_pb_stats; + + pb->pb_stats->input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); + pb->pb_stats->output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); + pb->pb_stats->lookahead_input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); + pb->pb_stats->lookahead_output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); + pb->pb_stats->num_feasible_blocks = NOT_VALID; + pb->pb_stats->feasible_blocks = new t_pack_molecule*[feasible_block_array_size]; + + for (int i = 0; i < feasible_block_array_size; i++) + pb->pb_stats->feasible_blocks[i] = nullptr; + + pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); + + pb->pb_stats->pulled_from_atom_groups = 0; + pb->pb_stats->num_att_group_atoms_used = 0; + + pb->pb_stats->gain.clear(); + pb->pb_stats->timinggain.clear(); + pb->pb_stats->connectiongain.clear(); + pb->pb_stats->sharinggain.clear(); + pb->pb_stats->hillgain.clear(); + pb->pb_stats->transitive_fanout_candidates.clear(); + + pb->pb_stats->num_pins_of_net_in_pb.clear(); + + pb->pb_stats->num_child_blocks_in_pb = 0; + + pb->pb_stats->explore_transitive_fanout = true; +} + +/* + * @brief Check the atom blocks of a cluster pb. Used in the verify method. + */ +/* TODO: May want to check that all atom blocks are actually reached */ +static void check_cluster_atom_blocks(t_pb* pb, std::unordered_set& blocks_checked) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + const t_pb_type* pb_type = pb->pb_graph_node->pb_type; + if (pb_type->num_modes == 0) { + /* primitive */ + AtomBlockId blk_id = atom_ctx.lookup.pb_atom(pb); + if (blk_id) { + if (blocks_checked.count(blk_id)) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "pb %s contains atom block %s but atom block is already contained in another pb.\n", + pb->name, atom_ctx.nlist.block_name(blk_id).c_str()); + } + blocks_checked.insert(blk_id); + if (pb != atom_ctx.lookup.atom_pb(blk_id)) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "pb %s contains atom block %s but atom block does not link to pb.\n", + pb->name, atom_ctx.nlist.block_name(blk_id).c_str()); + } + } + } else { + /* this is a container pb, all container pbs must contain children */ + bool has_child = false; + for (int i = 0; i < pb_type->modes[pb->mode].num_pb_type_children; i++) { + for (int j = 0; j < pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { + if (pb->child_pbs[i] != nullptr) { + if (pb->child_pbs[i][j].name != nullptr) { + has_child = true; + check_cluster_atom_blocks(&pb->child_pbs[i][j], blocks_checked); + } + } + } + } + VTR_ASSERT(has_child); + } +} + +/// @brief Recursively frees the pb stats of the given pb, without freeing the +/// pb itself. +static void free_pb_stats_recursive(t_pb* pb) { + /* Releases all the memory used by clustering data structures. */ + if (pb) { + if (pb->pb_graph_node != nullptr) { + if (!pb->pb_graph_node->is_primitive()) { + for (int i = 0; i < pb->pb_graph_node->pb_type->modes[pb->mode].num_pb_type_children; i++) { + for (int j = 0; j < pb->pb_graph_node->pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { + if (pb->child_pbs && pb->child_pbs[i]) { + free_pb_stats_recursive(&pb->child_pbs[i][j]); + } + } + } + } + } + free_pb_stats(pb); + } +} + +/* Record the failure of the molecule in this cluster in the current pb stats. + * If a molecule fails repeatedly, it's gain will be penalized if packing with + * attraction groups on. */ +static void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb) { + //Only have to record the failure for the first atom in the molecule. + //The convention when checking if a molecule has failed to pack in the cluster + //is to check whether the first atoms has been recorded as having failed + + auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); + if (got == pb->pb_stats->atom_failures.end()) { + pb->pb_stats->atom_failures.insert({molecule->atom_block_ids[0], 1}); + } else { + got->second++; + } +} + +/** + * @brief Checks whether an atom block can be added to a clustered block + * without violating floorplanning constraints. It also updates the + * clustered block's floorplanning region by taking the intersection of + * its current region and the floorplanning region of the given atom block. + * + * @param atom_blk_id A unique ID for the candidate atom block to + * be added to the growing cluster. + * @param cluster_pr The floorplanning regions of the clustered + * block. This function may update the given + * region. + * @param constraints The set of user-given place constraints. + * @param log_verbosity Controls the detail level of log information + * printed by this function. + * @param cluster_pr_needs_update Indicates whether the floorplanning region + * of the clustered block have updated. + * + * @return True if adding the given atom block to the clustered block does not + * violated any floorplanning constraints. + */ +static bool check_cluster_floorplanning(AtomBlockId atom_blk_id, + PartitionRegion& cluster_pr, + const UserPlaceConstraints& constraints, + int log_verbosity, + bool& cluster_pr_needs_update) { + // Get the partition ID of the atom. + PartitionId part_id = constraints.get_atom_partition(atom_blk_id); + // If the partition ID is invalid, then it can be put in the cluster + // regardless of what the cluster's PartitionRegion is since it is not + // constrained. + if (!part_id.is_valid()) { + VTR_LOGV(log_verbosity > 3, + "\t\t\t Intersect: Atom block %d has no floorplanning constraints\n", + atom_blk_id); + cluster_pr_needs_update = false; + return true; + } + + // Get the Atom and Cluster Partition Regions + const PartitionRegion& atom_pr = constraints.get_partition_pr(part_id); + + // If the Cluster's PartitionRegion is empty, then this atom's PR becomes + // the Cluster's new PartitionRegion. + if (cluster_pr.empty()) { + VTR_LOGV(log_verbosity > 3, + "\t\t\t Intersect: Atom block %d has floorplanning constraints\n", + atom_blk_id); + cluster_pr = atom_pr; + cluster_pr_needs_update = true; + return true; + } + + // The Cluster's new PartitionRegion is the intersection of the Cluster's + // original PartitionRegion and the atom's PartitionRegion. + update_cluster_part_reg(cluster_pr, atom_pr); + + // If the intersection is empty, then the atom cannot be placed in this + // Cluster due to floorplanning constraints. + if (cluster_pr.empty()) { + VTR_LOGV(log_verbosity > 3, + "\t\t\t Intersect: Atom block %d failed floorplanning check for cluster\n", + atom_blk_id); + cluster_pr_needs_update = false; + return false; + } + + // If the Cluster's new PartitionRegion is non-empty, then this atom passes + // the floorplanning constraints and the cluster's PartitionRegion should be + // updated. + cluster_pr_needs_update = true; + VTR_LOGV(log_verbosity > 3, + "\t\t\t Intersect: Atom block %d passed cluster, cluster PR was updated with intersection result \n", + atom_blk_id); + return true; +} + +/** + * @brief Checks if an atom block can be added to a clustered block without + * violating NoC group constraints. For passing this check, either both + * clustered and atom blocks must belong to the same NoC group, or at + * least one of them should not belong to any NoC group. If the atom block + * is associated with a NoC group while the clustered block does not + * belong to any NoC groups, the NoC group ID of the atom block is assigned + * to the clustered block when the atom is added to it. + * + * @param atom_blk_id A unique ID for the candidate atom block to be + * added to the growing cluster. + * @param cluster_noc_grp_id The NoC group ID of the clustered block. This + * function may update this ID. + * @param atom_noc_grp_ids A mapping from atoms to NoC group IDs. + * @param log_verbosity Controls the detail level of log information + * printed by this function. + * + * @return True if adding the atom block the cluster does not violate NoC group + * constraints. + */ +static bool check_cluster_noc_group(AtomBlockId atom_blk_id, + NocGroupId& cluster_noc_grp_id, + const vtr::vector& atom_noc_grp_ids, + int log_verbosity) { + const NocGroupId atom_noc_grp_id = atom_noc_grp_ids.empty() ? NocGroupId::INVALID() : atom_noc_grp_ids[atom_blk_id]; + + if (!cluster_noc_grp_id.is_valid()) { + // If the cluster does not have a NoC group, assign the atom's NoC group + // to the cluster. + VTR_LOGV(log_verbosity > 3, + "\t\t\t NoC Group: Atom block %d passed cluster, cluster's NoC group was updated with the atom's group %d\n", + atom_blk_id, (size_t)atom_noc_grp_id); + cluster_noc_grp_id = atom_noc_grp_id; + return true; + } + + if (cluster_noc_grp_id == atom_noc_grp_id) { + // If the cluster has the same NoC group ID as the atom, they are + // compatible. + VTR_LOGV(log_verbosity > 3, + "\t\t\t NoC Group: Atom block %d passed cluster, cluster's NoC group was compatible with the atom's group %d\n", + atom_blk_id, (size_t)atom_noc_grp_id); + return true; + } + + // If the cluster belongs to a different NoC group than the atom's group, + // they are incompatible. + VTR_LOGV(log_verbosity > 3, + "\t\t\t NoC Group: Atom block %d failed NoC group check for cluster. Cluster's NoC group: %d, atom's NoC group: %d\n", + atom_blk_id, (size_t)cluster_noc_grp_id, (size_t)atom_noc_grp_id); + return false; +} + +/** + * @brief This function takes the root block of a chain molecule and a proposed + * placement primitive for this block. The function then checks if this + * chain root block has a placement constraint (such as being driven from + * outside the cluster) and returns the status of the placement accordingly. + */ +static enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, + const t_pack_molecule* molecule, + const AtomBlockId blk_id) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + enum e_block_pack_status block_pack_status = e_block_pack_status::BLK_PASSED; + + bool is_long_chain = molecule->chain_info->is_long_chain; + + const auto& chain_root_pins = molecule->pack_pattern->chain_root_pins; + + t_model_ports* root_port = chain_root_pins[0][0]->port->model_port; + AtomNetId chain_net_id; + auto port_id = atom_ctx.nlist.find_atom_port(blk_id, root_port); + + if (port_id) { + chain_net_id = atom_ctx.nlist.port_net(port_id, chain_root_pins[0][0]->pin_number); + } + + // if this block is part of a long chain or it is driven by a cluster + // input pin we need to check the placement legality of this block + // Depending on the logic synthesis even small chains that can fit within one + // cluster might need to start at the top of the cluster as their input can be + // driven by a global gnd or vdd. Therefore even if this is not a long chain + // but its input pin is driven by a net, the placement legality is checked. + if (is_long_chain || chain_net_id) { + auto chain_id = molecule->chain_info->chain_id; + // if this chain has a chain id assigned to it (implies is_long_chain too) + if (chain_id != -1) { + // the chosen primitive should be a valid starting point for the chain + // long chains should only be placed at the top of the chain tieOff = 0 + if (pb_graph_node != chain_root_pins[chain_id][0]->parent_node) { + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + } + // the chain doesn't have an assigned chain_id yet + } else { + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + for (const auto& chain : chain_root_pins) { + for (auto tieOff : chain) { + // check if this chosen primitive is one of the possible + // starting points for this chain. + if (pb_graph_node == tieOff->parent_node) { + // this location matches with the one of the dedicated chain + // input from outside logic block, therefore it is feasible + block_pack_status = e_block_pack_status::BLK_PASSED; + break; + } + // long chains should only be placed at the top of the chain tieOff = 0 + if (is_long_chain) break; + } + } + } + } + + return block_pack_status; +} + +/* + * @brief Check that the two atom blocks blk_id and sibling_blk_id (which should + * both be memory slices) are feasible, in the sense that they have + * precicely the same net connections (with the exception of nets in data + * port classes). + * + * Note that this routine does not check pin feasibility against the cur_pb_type; so + * primitive_type_feasible() should also be called on blk_id before concluding it is feasible. + */ +static bool primitive_memory_sibling_feasible(const AtomBlockId blk_id, const t_pb_type* cur_pb_type, const AtomBlockId sibling_blk_id) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + VTR_ASSERT(cur_pb_type->class_type == MEMORY_CLASS); + + //First, identify the 'data' ports by looking at the cur_pb_type + std::unordered_set data_ports; + for (int iport = 0; iport < cur_pb_type->num_ports; ++iport) { + const char* port_class = cur_pb_type->ports[iport].port_class; + if (port_class && strstr(port_class, "data") == port_class) { + //The port_class starts with "data", so it is a data port + + //Record the port + data_ports.insert(cur_pb_type->ports[iport].model_port); + } + } + + //Now verify that all nets (except those connected to data ports) are equivalent + //between blk_id and sibling_blk_id + + //Since the atom netlist stores only in-use ports, we iterate over the model to ensure + //all ports are compared + const t_model* model = cur_pb_type->model; + for (t_model_ports* port : {model->inputs, model->outputs}) { + for (; port; port = port->next) { + if (data_ports.count(port)) { + //Don't check data ports + continue; + } + + //Note: VPR doesn't support multi-driven nets, so all outputs + //should be data ports, otherwise the siblings will both be + //driving the output net + + //Get the ports from each primitive + auto blk_port_id = atom_ctx.nlist.find_atom_port(blk_id, port); + auto sib_port_id = atom_ctx.nlist.find_atom_port(sibling_blk_id, port); + + //Check that all nets (including unconnected nets) match + for (int ipin = 0; ipin < port->size; ++ipin) { + //The nets are initialized as invalid (i.e. disconnected) + AtomNetId blk_net_id; + AtomNetId sib_net_id; + + //We can get the actual net provided the port exists + // + //Note that if the port did not exist, the net is left + //as invalid/disconneced + if (blk_port_id) { + blk_net_id = atom_ctx.nlist.port_net(blk_port_id, ipin); + } + if (sib_port_id) { + sib_net_id = atom_ctx.nlist.port_net(sib_port_id, ipin); + } + + //The sibling and block must have the same (possibly disconnected) + //net on this pin + if (blk_net_id != sib_net_id) { + //Nets do not match, not feasible + return false; + } + } + } + } + + return true; +} + +/* + * @brief Check if the given atom is feasible in the given pb. + */ +static bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + const t_pb_type* cur_pb_type = cur_pb->pb_graph_node->pb_type; + + VTR_ASSERT(cur_pb_type->num_modes == 0); /* primitive */ + + AtomBlockId cur_pb_blk_id = atom_ctx.lookup.pb_atom(cur_pb); + if (cur_pb_blk_id && cur_pb_blk_id != blk_id) { + /* This pb already has a different logical block */ + return false; + } + + if (cur_pb_type->class_type == MEMORY_CLASS) { + /* Memory class has additional feasibility requirements: + * - all siblings must share all nets, including open nets, with the exception of data nets */ + + /* find sibling if one exists */ + AtomBlockId sibling_memory_blk_id = find_memory_sibling(cur_pb); + + if (sibling_memory_blk_id) { + //There is a sibling, see if the current block is feasible with it + bool sibling_feasible = primitive_memory_sibling_feasible(blk_id, cur_pb_type, sibling_memory_blk_id); + if (!sibling_feasible) { + return false; + } + } + } + + //Generic feasibility check + return primitive_type_feasible(blk_id, cur_pb_type); +} + +/** + * @brief Try to place atom block into current primitive location + */ +static enum e_block_pack_status +try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, + const AtomBlockId blk_id, + t_pb* cb, + t_pb** parent, + const int max_models, + const int max_cluster_size, + const LegalizationClusterId cluster_id, + vtr::vector_map& atom_cluster, + const t_cluster_placement_stats* cluster_placement_stats_ptr, + const t_pack_molecule* molecule, + t_lb_router_data* router_data, + int verbosity, + const int feasible_block_array_size) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + AtomContext& mutable_atom_ctx = g_vpr_ctx.mutable_atom(); + + VTR_ASSERT_SAFE(cb != nullptr); + e_block_pack_status block_pack_status = e_block_pack_status::BLK_PASSED; + + /* Discover parent */ + t_pb* parent_pb = nullptr; + if (pb_graph_node->parent_pb_graph_node != cb->pb_graph_node) { + t_pb* my_parent = nullptr; + block_pack_status = try_place_atom_block_rec(pb_graph_node->parent_pb_graph_node, blk_id, cb, + &my_parent, max_models, max_cluster_size, cluster_id, + atom_cluster, + cluster_placement_stats_ptr, molecule, router_data, + verbosity, feasible_block_array_size); + parent_pb = my_parent; + } else { + parent_pb = cb; + } + + /* Create siblings if siblings are not allocated */ + VTR_ASSERT(parent_pb != nullptr); + if (parent_pb->child_pbs == nullptr) { + VTR_ASSERT(parent_pb->name == nullptr); + parent_pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); + parent_pb->mode = pb_graph_node->pb_type->parent_mode->index; + set_reset_pb_modes(router_data, parent_pb, true); + const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; + parent_pb->child_pbs = new t_pb*[mode->num_pb_type_children]; + + for (int i = 0; i < mode->num_pb_type_children; i++) { + parent_pb->child_pbs[i] = new t_pb[mode->pb_type_children[i].num_pb]; + + for (int j = 0; j < mode->pb_type_children[i].num_pb; j++) { + parent_pb->child_pbs[i][j].parent_pb = parent_pb; + parent_pb->child_pbs[i][j].pb_graph_node = &(parent_pb->pb_graph_node->child_pb_graph_nodes[parent_pb->mode][i][j]); + } + } + } else { + /* if this is not the first child of this parent, must match existing parent mode */ + if (parent_pb->mode != pb_graph_node->pb_type->parent_mode->index) { + return e_block_pack_status::BLK_FAILED_FEASIBLE; + } + } + + const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; + int i; + for (i = 0; i < mode->num_pb_type_children; i++) { + if (pb_graph_node->pb_type == &mode->pb_type_children[i]) { + break; + } + } + VTR_ASSERT(i < mode->num_pb_type_children); + t_pb* pb = &parent_pb->child_pbs[i][pb_graph_node->placement_index]; + VTR_ASSERT_SAFE(pb != nullptr); + *parent = pb; /* this pb is parent of it's child that called this function */ + VTR_ASSERT(pb->pb_graph_node == pb_graph_node); + if (pb->pb_stats == nullptr) { + alloc_and_load_pb_stats(pb, feasible_block_array_size); + } + const t_pb_type* pb_type = pb_graph_node->pb_type; + + /* Any pb_type under an mode, which is disabled for packing, should not be considerd for mapping + * Early exit to flag failure + */ + if (true == pb_type->parent_mode->disable_packing) { + return e_block_pack_status::BLK_FAILED_FEASIBLE; + } + + bool is_primitive = (pb_type->num_modes == 0); + + if (is_primitive) { + VTR_ASSERT(!atom_ctx.lookup.pb_atom(pb) + && atom_ctx.lookup.atom_pb(blk_id) == nullptr + && atom_cluster[blk_id] == LegalizationClusterId::INVALID()); + /* try pack to location */ + VTR_ASSERT(pb->name == nullptr); + pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); + + //Update the atom netlist mappings + atom_cluster[blk_id] = cluster_id; + // NOTE: This pb is different from the pb of the cluster. It is the pb + // of the actual primitive. + // TODO: It would be a good idea to remove the use of this global + // variables to prevent external users from modifying this by + // mistake. + mutable_atom_ctx.lookup.set_atom_pb(blk_id, pb); + + add_atom_as_target(router_data, blk_id); + if (!primitive_feasible(blk_id, pb)) { + /* failed location feasibility check, revert pack */ + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + } + + // if this block passed and is part of a chained molecule + if (block_pack_status == e_block_pack_status::BLK_PASSED && molecule->is_chain()) { + auto molecule_root_block = molecule->atom_block_ids[molecule->root]; + // if this is the root block of the chain molecule check its placmeent feasibility + if (blk_id == molecule_root_block) { + block_pack_status = check_chain_root_placement_feasibility(pb_graph_node, molecule, blk_id); + } + } + + VTR_LOGV(verbosity > 4 && block_pack_status == e_block_pack_status::BLK_PASSED, + "\t\t\tPlaced atom '%s' (%s) at %s\n", + atom_ctx.nlist.block_name(blk_id).c_str(), + atom_ctx.nlist.block_model(blk_id)->name, + pb->hierarchical_type_name().c_str()); + } + + if (block_pack_status != e_block_pack_status::BLK_PASSED) { + free(pb->name); + pb->name = nullptr; + } + return block_pack_status; +} + +/* + * @brief Resets nets used at different pin classes for determining pin + * feasibility. + */ +static void reset_lookahead_pins_used(t_pb* cur_pb) { + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + if (cur_pb->pb_stats == nullptr) { + return; /* No pins used, no need to continue */ + } + + if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { + for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { + cur_pb->pb_stats->lookahead_input_pins_used[i].clear(); + } + + for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { + cur_pb->pb_stats->lookahead_output_pins_used[i].clear(); + } + + if (cur_pb->child_pbs != nullptr) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i] != nullptr) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + reset_lookahead_pins_used(&cur_pb->child_pbs[i][j]); + } + } + } + } + } +} + +/* + * @brief Checks if the sinks of the given net are reachable from the driver + * pb gpin. + */ +static int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + //Record the sink pb graph pins we are looking for + std::unordered_set sink_pb_gpins; + for (const AtomPinId pin_id : atom_ctx.nlist.net_sinks(net_id)) { + const t_pb_graph_pin* sink_pb_gpin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); + VTR_ASSERT(sink_pb_gpin); + + sink_pb_gpins.insert(sink_pb_gpin); + } + + //Count how many sink pins are reachable + size_t num_reachable_sinks = 0; + for (int i_prim_pin = 0; i_prim_pin < driver_pb_gpin->num_connectable_primitive_input_pins[depth]; ++i_prim_pin) { + const t_pb_graph_pin* reachable_pb_gpin = driver_pb_gpin->list_of_connectable_input_pin_ptrs[depth][i_prim_pin]; + + if (sink_pb_gpins.count(reachable_pb_gpin)) { + ++num_reachable_sinks; + if (num_reachable_sinks == atom_ctx.nlist.net_sinks(net_id).size()) { + return true; + } + } + } + + return false; +} + +/** + * @brief Returns the pb_graph_pin of the atom pin defined by the driver_pin_id in the driver_pb + */ +static t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + const auto driver_pb_type = driver_pb->pb_graph_node->pb_type; + int output_port = 0; + // find the port of the pin driving the net as well as the port model + auto driver_port_id = atom_ctx.nlist.pin_port(driver_pin_id); + auto driver_model_port = atom_ctx.nlist.port_model(driver_port_id); + // find the port id of the port containing the driving pin in the driver_pb_type + for (int i = 0; i < driver_pb_type->num_ports; i++) { + auto& prim_port = driver_pb_type->ports[i]; + if (prim_port.type == OUT_PORT) { + if (prim_port.model_port == driver_model_port) { + // get the output pb_graph_pin driving this input net + return &(driver_pb->pb_graph_node->output_pins[output_port][atom_ctx.nlist.pin_port_bit(driver_pin_id)]); + } + output_port++; + } + } + // the pin should be found + VTR_ASSERT(false); + return nullptr; +} + +/** + * @brief Given a pin and its assigned net, mark all pin classes that are affected. + * Check if connecting this pin to it's driver pin or to all sink pins will + * require leaving a pb_block starting from the parent pb_block of the + * primitive till the root block (depth = 0). If leaving a pb_block is + * required add this net to the pin class (to increment the number of used + * pins from this class) that should be used to leave the pb_block. + */ +static void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, + const t_pb* primitive_pb, + const AtomNetId net_id, + const vtr::vector_map& atom_cluster) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + // starting from the parent pb of the input primitive go up in the hierarchy till the root block + for (auto cur_pb = primitive_pb->parent_pb; cur_pb; cur_pb = cur_pb->parent_pb) { + const auto depth = cur_pb->pb_graph_node->pb_type->depth; + const auto pin_class = pb_graph_pin->parent_pin_class[depth]; + VTR_ASSERT(pin_class != OPEN); + + const auto driver_blk_id = atom_ctx.nlist.net_driver_block(net_id); + + // if this primitive pin is an input pin + if (pb_graph_pin->port->type == IN_PORT) { + /* find location of net driver if exist in clb, NULL otherwise */ + // find the driver of the input net connected to the pin being studied + const auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); + // find the id of the atom occupying the input primitive_pb + const auto prim_blk_id = atom_ctx.lookup.pb_atom(primitive_pb); + // find the pb block occupied by the driving atom + const auto driver_pb = atom_ctx.lookup.atom_pb(driver_blk_id); + // pb_graph_pin driving net_id in the driver pb block + t_pb_graph_pin* output_pb_graph_pin = nullptr; + // if the driver block is in the same clb as the input primitive block + LegalizationClusterId driver_cluster_id = atom_cluster[driver_blk_id]; + LegalizationClusterId prim_cluster_id = atom_cluster[prim_blk_id]; + if (driver_cluster_id == prim_cluster_id) { + // get pb_graph_pin driving the given net + output_pb_graph_pin = get_driver_pb_graph_pin(driver_pb, driver_pin_id); + } + + bool is_reachable = false; + + // if the driver pin is within the cluster + if (output_pb_graph_pin) { + // find if the driver pin can reach the input pin of the primitive or not + const t_pb* check_pb = driver_pb; + while (check_pb && check_pb != cur_pb) { + check_pb = check_pb->parent_pb; + } + if (check_pb) { + for (int i = 0; i < output_pb_graph_pin->num_connectable_primitive_input_pins[depth]; i++) { + if (pb_graph_pin == output_pb_graph_pin->list_of_connectable_input_pin_ptrs[depth][i]) { + is_reachable = true; + break; + } + } + } + } + + // Must use an input pin to connect the driver to the input pin of the given primitive, either the + // driver atom is not contained in the cluster or is contained but cannot reach the primitive pin + if (!is_reachable) { + // add net to lookahead_input_pins_used if not already added + auto it = std::find(cur_pb->pb_stats->lookahead_input_pins_used[pin_class].begin(), + cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end(), net_id); + if (it == cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end()) { + cur_pb->pb_stats->lookahead_input_pins_used[pin_class].push_back(net_id); + } + } + } else { + VTR_ASSERT(pb_graph_pin->port->type == OUT_PORT); + /* + * Determine if this net (which is driven from within this cluster) leaves this cluster + * (and hence uses an output pin). + */ + + bool net_exits_cluster = true; + int num_net_sinks = static_cast(atom_ctx.nlist.net_sinks(net_id).size()); + + if (pb_graph_pin->num_connectable_primitive_input_pins[depth] >= num_net_sinks) { + //It is possible the net is completely absorbed in the cluster, + //since this pin could (potentially) drive all the net's sinks + + /* Important: This runtime penalty looks a lot scarier than it really is. + * For high fan-out nets, I at most look at the number of pins within the + * cluster which limits runtime. + * + * DO NOT REMOVE THIS INITIAL FILTER WITHOUT CAREFUL ANALYSIS ON RUNTIME!!! + * + * Key Observation: + * For LUT-based designs it is impossible for the average fanout to exceed + * the number of LUT inputs so it's usually around 4-5 (pigeon-hole argument, + * if the average fanout is greater than the number of LUT inputs, where do + * the extra connections go? Therefore, average fanout must be capped to a + * small constant where the constant is equal to the number of LUT inputs). + * The real danger to runtime is when the number of sinks of a net gets doubled + */ + + //Check if all the net sinks are, in fact, inside this cluster + bool all_sinks_in_cur_cluster = true; + LegalizationClusterId driver_cluster = atom_cluster[driver_blk_id]; + for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { + auto sink_blk_id = atom_ctx.nlist.pin_block(pin_id); + if (atom_cluster[sink_blk_id] != driver_cluster) { + all_sinks_in_cur_cluster = false; + break; + } + } + + if (all_sinks_in_cur_cluster) { + //All the sinks are part of this cluster, so the net may be fully absorbed. + // + //Verify this, by counting the number of net sinks reachable from the driver pin. + //If the count equals the number of net sinks then the net is fully absorbed and + //the net does not exit the cluster + /* TODO: I should cache the absorbed outputs, once net is absorbed, + * net is forever absorbed, no point in rechecking every time */ + if (net_sinks_reachable_in_cluster(pb_graph_pin, depth, net_id)) { + //All the sinks are reachable inside the cluster + net_exits_cluster = false; + } + } + } + + if (net_exits_cluster) { + /* This output must exit this cluster */ + cur_pb->pb_stats->lookahead_output_pins_used[pin_class].push_back(net_id); + } + } + } +} + + +/* + * @brief Determine if pins of speculatively packed pb are legal + */ +static void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id, + const vtr::vector_map& atom_cluster) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); + VTR_ASSERT(cur_pb != nullptr); + + /* Walk through inputs, outputs, and clocks marking pins off of the same class */ + for (auto pin_id : atom_ctx.nlist.block_pins(blk_id)) { + auto net_id = atom_ctx.nlist.pin_net(pin_id); + + const t_pb_graph_pin* pb_graph_pin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); + compute_and_mark_lookahead_pins_used_for_pin(pb_graph_pin, cur_pb, net_id, atom_cluster); + } +} + +/* + * @brief Determine if speculatively packed cur_pb is pin feasible + * + * Runtime is actually not that bad for this. It's worst case O(k^2) where k is the + * number of pb_graph pins. Can use hash tables or make incremental if becomes an issue. + */ +static void try_update_lookahead_pins_used(t_pb* cur_pb, + const vtr::vector_map& atom_cluster) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + // run recursively till a leaf (primitive) pb block is reached + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { + if (cur_pb->child_pbs != nullptr) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i] != nullptr) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + try_update_lookahead_pins_used(&cur_pb->child_pbs[i][j], atom_cluster); + } + } + } + } + } else { + // find if this child (primitive) pb block has an atom mapped to it, + // if yes compute and mark lookahead pins used for that pb block + AtomBlockId blk_id = atom_ctx.lookup.pb_atom(cur_pb); + if (pb_type->blif_model != nullptr && blk_id) { + compute_and_mark_lookahead_pins_used(blk_id, atom_cluster); + } + } +} + +/* + * @brief Check if the number of available inputs/outputs for a pin class is + * sufficient for speculatively packed blocks. + */ +static bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util) { + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + + if (pb_type->num_modes > 0 && cur_pb->name) { + for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { + size_t class_size = cur_pb->pb_graph_node->input_pin_class_size[i]; + + if (cur_pb->is_root()) { + // Scale the class size by the maximum external pin utilization factor + // Use ceil to avoid classes of size 1 from being scaled to zero + class_size = std::ceil(max_external_pin_util.input_pin_util * class_size); + // if the number of pins already used is larger than class size, then the number of + // cluster inputs already used should be our constraint. Why is this needed? This is + // needed since when packing the seed block the maximum external pin utilization is + // used as 1.0 allowing molecules that are using up to all the cluster inputs to be + // packed legally. Therefore, if the seed block is already using more inputs than + // the allowed maximum utilization, this should become the new maximum pin utilization. + class_size = std::max(class_size, cur_pb->pb_stats->input_pins_used[i].size()); + } + + if (cur_pb->pb_stats->lookahead_input_pins_used[i].size() > class_size) { + return false; + } + } + + for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { + size_t class_size = cur_pb->pb_graph_node->output_pin_class_size[i]; + if (cur_pb->is_root()) { + // Scale the class size by the maximum external pin utilization factor + // Use ceil to avoid classes of size 1 from being scaled to zero + class_size = std::ceil(max_external_pin_util.output_pin_util * class_size); + // if the number of pins already used is larger than class size, then the number of + // cluster outputs already used should be our constraint. Why is this needed? This is + // needed since when packing the seed block the maximum external pin utilization is + // used as 1.0 allowing molecules that are using up to all the cluster inputs to be + // packed legally. Therefore, if the seed block is already using more inputs than + // the allowed maximum utilization, this should become the new maximum pin utilization. + class_size = std::max(class_size, cur_pb->pb_stats->output_pins_used[i].size()); + } + + if (cur_pb->pb_stats->lookahead_output_pins_used[i].size() > class_size) { + return false; + } + } + + if (cur_pb->child_pbs) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i]) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + if (!check_lookahead_pins_used(&cur_pb->child_pbs[i][j], max_external_pin_util)) + return false; + } + } + } + } + } + + return true; +} + +/** + * @brief This function takes a chain molecule, and the pb_graph_node that is + * chosen for packing the molecule's root block. Using the given + * root_primitive, this function will identify which chain id this + * molecule is being mapped to and will update the chain id value inside + * the chain info data structure of this molecule. + */ +static void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive) { + VTR_ASSERT(chain_molecule->chain_info->chain_id == -1 && chain_molecule->chain_info->is_long_chain); + + auto chain_root_pins = chain_molecule->pack_pattern->chain_root_pins; + + // long chains should only be placed at the beginning of the chain + // Since for long chains the molecule size is already equal to the + // total number of adders in the cluster. Therefore, it should + // always be placed at the very first adder in this cluster. + for (size_t chainId = 0; chainId < chain_root_pins.size(); chainId++) { + if (chain_root_pins[chainId][0]->parent_node == root_primitive) { + chain_molecule->chain_info->chain_id = chainId; + chain_molecule->chain_info->first_packed_molecule = chain_molecule; + return; + } + } + + VTR_ASSERT(false); +} + +/* + * @brief Revert trial atom block iblock and free up memory space accordingly. + */ +static void revert_place_atom_block(const AtomBlockId blk_id, + t_lb_router_data* router_data, + const Prepacker& prepacker, + vtr::vector_map& atom_cluster) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + AtomContext& mutable_atom_ctx = g_vpr_ctx.mutable_atom(); + + //We cast away const here since we may free the pb, and it is + //being removed from the active mapping. + // + //In general most code works fine accessing cosnt t_pb*, + //which is why we store them as such in atom_ctx.lookup + t_pb* pb = const_cast(atom_ctx.lookup.atom_pb(blk_id)); + + if (pb != nullptr) { + /* When freeing molecules, the current block might already have been freed by a prior revert + * When this happens, no need to do anything beyond basic book keeping at the atom block + */ + + t_pb* next = pb->parent_pb; + revalid_molecules(pb, prepacker); + free_pb(pb); + pb = next; + + while (pb != nullptr) { + /* If this is pb is created only for the purposes of holding new molecule, remove it + * Must check if cluster is already freed (which can be the case) + */ + next = pb->parent_pb; + + if (pb->child_pbs != nullptr && pb->pb_stats != nullptr + && pb->pb_stats->num_child_blocks_in_pb == 0) { + set_reset_pb_modes(router_data, pb, false); + if (next != nullptr) { + /* If the code gets here, then that means that placing the initial seed molecule + * failed, don't free the actual complex block itself as the seed needs to find + * another placement */ + revalid_molecules(pb, prepacker); + free_pb(pb); + } + } + pb = next; + } + } + + //Update the atom netlist mapping + atom_cluster[blk_id] = LegalizationClusterId::INVALID(); + mutable_atom_ctx.lookup.set_atom_pb(blk_id, nullptr); +} + +/* + * @brief Speculation successful, commit input/output pins used. + */ +static void commit_lookahead_pins_used(t_pb* cur_pb) { + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + + if (pb_type->num_modes > 0 && cur_pb->name) { + for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->input_pin_class_size[i]); + for (size_t j = 0; j < cur_pb->pb_stats->lookahead_input_pins_used[i].size(); j++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i][j]); + cur_pb->pb_stats->input_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_input_pins_used[i][j]}); + } + } + + for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->output_pin_class_size[i]); + for (size_t j = 0; j < cur_pb->pb_stats->lookahead_output_pins_used[i].size(); j++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i][j]); + cur_pb->pb_stats->output_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_output_pins_used[i][j]}); + } + } + + if (cur_pb->child_pbs) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i]) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + commit_lookahead_pins_used(&cur_pb->child_pbs[i][j]); + } + } + } + } + } +} + +/** + * @brief Cleans up a pb after unsuccessful molecule packing + * + * Recursively frees pbs from a t_pb tree. The given root pb itself is not + * deleted. + * + * If a pb object has its children allocated then before freeing them the + * function checks if there is no atom that corresponds to any of them. The + * check is performed only for leaf (primitive) pbs. The function recurses for + * non-primitive pbs. + * + * The cleaning itself includes deleting all child pbs, resetting mode of the + * pb and also freeing its name. This prepares the pb for another round of + * molecule packing tryout. + */ +static bool cleanup_pb(t_pb* pb) { + bool can_free = true; + + /* Recursively check if there are any children with already assigned atoms */ + if (pb->child_pbs != nullptr) { + const t_mode* mode = &pb->pb_graph_node->pb_type->modes[pb->mode]; + VTR_ASSERT(mode != nullptr); + + /* Check each mode */ + for (int i = 0; i < mode->num_pb_type_children; ++i) { + /* Check each child */ + if (pb->child_pbs[i] != nullptr) { + for (int j = 0; j < mode->pb_type_children[i].num_pb; ++j) { + t_pb* pb_child = &pb->child_pbs[i][j]; + t_pb_type* pb_type = pb_child->pb_graph_node->pb_type; + + /* Primitive, check occupancy */ + if (pb_type->num_modes == 0) { + if (pb_child->name != nullptr) { + can_free = false; + } + } + + /* Non-primitive, recurse */ + else { + if (!cleanup_pb(pb_child)) { + can_free = false; + } + } + } + } + } + + /* Free if can */ + if (can_free) { + for (int i = 0; i < mode->num_pb_type_children; ++i) { + if (pb->child_pbs[i] != nullptr) { + delete[] pb->child_pbs[i]; + } + } + + delete[] pb->child_pbs; + pb->child_pbs = nullptr; + pb->mode = 0; + + if (pb->name) { + free(pb->name); + pb->name = nullptr; + } + } + } + + return can_free; +} + +e_block_pack_status ClusterLegalizer::try_pack_molecule(t_pack_molecule* molecule, + LegalizationCluster& cluster, + LegalizationClusterId cluster_id, + const t_ext_pin_util& max_external_pin_util) { + // Try to pack the molecule into a cluster with this pb type. + + // Safety debugs. + VTR_ASSERT_DEBUG(molecule != nullptr); + VTR_ASSERT_DEBUG(cluster.pb != nullptr); + VTR_ASSERT_DEBUG(cluster.type != nullptr); + + // TODO: Remove these global accesses to the contexts. + // AtomContext used for: + // - printing verbose statements + // - Looking up the primitive pb + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + // FloorplanningContext used for: + // - Checking if the atom can be placed in the cluster for floorplanning + // constraints. + const FloorplanningContext& floorplanning_ctx = g_vpr_ctx.floorplanning(); + if (log_verbosity_ > 3) { + AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; + VTR_LOG("\t\tTry pack molecule: '%s' (%s)", + atom_ctx.nlist.block_name(root_atom).c_str(), + atom_ctx.nlist.block_model(root_atom)->name); + VTR_LOGV(molecule->pack_pattern, + " molecule_type %s molecule_size %zu", + molecule->pack_pattern->name, + molecule->atom_block_ids.size()); + VTR_LOG("\n"); + } + + // if this cluster has a molecule placed in it that is part of a long chain + // (a chain that consists of more than one molecule), don't allow more long chain + // molecules to be placed in this cluster. To avoid possibly creating cluster level + // blocks that have incompatible placement constraints or form very long placement + // macros that limit placement flexibility. + t_cluster_placement_stats* cluster_placement_stats_ptr = &(cluster_placement_stats_[cluster.type->index]); + if (cluster_placement_stats_ptr->has_long_chain && molecule->is_chain() && molecule->chain_info->is_long_chain) { + VTR_LOGV(log_verbosity_ > 4, "\t\t\tFAILED Placement Feasibility Filter: Only one long chain per cluster is allowed\n"); + //Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, cluster.pb); + // Free the allocated data. + return e_block_pack_status::BLK_FAILED_FEASIBLE; + } + + // Check if every atom in the molecule is legal in the cluster from a + // floorplanning perspective + bool cluster_pr_update_check = false; + PartitionRegion new_cluster_pr = cluster.pr; + // TODO: This can be made more efficient by pre-computing the intersection + // of all the atoms' PRs in the molecule. + int molecule_size = get_array_size_of_molecule(molecule); + for (int i_mol = 0; i_mol < molecule_size; i_mol++) { + // Try to intersect with atom PartitionRegion if atom exists + AtomBlockId atom_blk_id = molecule->atom_block_ids[i_mol]; + if (atom_blk_id) { + bool cluster_pr_needs_update = false; + bool block_pack_floorplan_status = check_cluster_floorplanning(atom_blk_id, + new_cluster_pr, + floorplanning_ctx.constraints, + log_verbosity_, + cluster_pr_needs_update); + if (!block_pack_floorplan_status) { + // Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, cluster.pb); + return e_block_pack_status::BLK_FAILED_FLOORPLANNING; + } + + if (cluster_pr_needs_update) { + cluster_pr_update_check = true; + } + } + } + + // Check if all atoms in the molecule can be added to the cluster without + // NoC group conflicts + NocGroupId new_cluster_noc_grp_id = cluster.noc_grp_id; + for (int i_mol = 0; i_mol < molecule_size; i_mol++) { + AtomBlockId atom_blk_id = molecule->atom_block_ids[i_mol]; + if (atom_blk_id) { + bool block_pack_noc_grp_status = check_cluster_noc_group(atom_blk_id, + new_cluster_noc_grp_id, + atom_noc_grp_id_, + log_verbosity_); + if (!block_pack_noc_grp_status) { + // Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, cluster.pb); + return e_block_pack_status::BLK_FAILED_NOC_GROUP; + } + } + } + + std::vector primitives_list(max_molecule_size_, nullptr); + e_block_pack_status block_pack_status = e_block_pack_status::BLK_STATUS_UNDEFINED; + while (block_pack_status != e_block_pack_status::BLK_PASSED) { + if (!get_next_primitive_list(cluster_placement_stats_ptr, + molecule, + primitives_list.data())) { + VTR_LOGV(log_verbosity_ > 3, "\t\tFAILED No candidate primitives available\n"); + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + break; /* no more candidate primitives available, this molecule will not pack, return fail */ + } + + block_pack_status = e_block_pack_status::BLK_PASSED; + int failed_location = 0; + for (int i_mol = 0; i_mol < molecule_size && block_pack_status == e_block_pack_status::BLK_PASSED; i_mol++) { + VTR_ASSERT((primitives_list[i_mol] == nullptr) == (!molecule->atom_block_ids[i_mol])); + failed_location = i_mol + 1; + AtomBlockId atom_blk_id = molecule->atom_block_ids[i_mol]; + if (!atom_blk_id.is_valid()) + continue; + // NOTE: This parent variable is only used in the recursion of this + // function. + t_pb* parent = nullptr; + block_pack_status = try_place_atom_block_rec(primitives_list[i_mol], + atom_blk_id, + cluster.pb, + &parent, + num_models_, + max_cluster_size_, + cluster_id, + atom_cluster_, + cluster_placement_stats_ptr, + molecule, + cluster.router_data, + log_verbosity_, + feasible_block_array_size_); + } + + if (enable_pin_feasibility_filter_ && block_pack_status == e_block_pack_status::BLK_PASSED) { + // Check if pin usage is feasible for the current packing assignment + reset_lookahead_pins_used(cluster.pb); + try_update_lookahead_pins_used(cluster.pb, atom_cluster_); + if (!check_lookahead_pins_used(cluster.pb, max_external_pin_util)) { + VTR_LOGV(log_verbosity_ > 4, "\t\t\tFAILED Pin Feasibility Filter\n"); + block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; + } else { + VTR_LOGV(log_verbosity_ > 3, "\t\t\tPin Feasibility: Passed pin feasibility filter\n"); + } + } + + if (block_pack_status == e_block_pack_status::BLK_PASSED) { + /* + * during the clustering step of `do_clustering`, `detailed_routing_stage` is incremented at each iteration until it a cluster + * is correctly generated or `detailed_routing_stage` assumes an invalid value (E_DETAILED_ROUTE_INVALID). + * depending on its value we have different behaviors: + * - E_DETAILED_ROUTE_AT_END_ONLY: Skip routing if heuristic is to route at the end of packing complex block. + * - E_DETAILED_ROUTE_FOR_EACH_ATOM: Try to route if heuristic is to route for every atom. If the clusterer arrives at this stage, + * it means that more checks have to be performed as the previous stage failed to generate a new cluster. + * + * mode_status is a data structure containing the status of the mode selection. Its members are: + * - bool is_mode_conflict + * - bool try_expand_all_modes + * - bool expand_all_modes + * + * is_mode_conflict affects this stage. Its value determines whether the cluster failed to pack after a mode conflict issue. + * It holds a flag that is used to verify whether try_intra_lb_route ended in a mode conflict issue. + * + * Until is_mode_conflict is set to FALSE by try_intra_lb_route, the loop re-iterates. If all the available modes are exhausted + * an error will be thrown during mode conflicts checks (this to prevent infinite loops). + * + * If the value is TRUE the cluster has to be re-routed, and its internal pb_graph_nodes will have more restrict choices + * for what regards the mode that has to be selected. + * + * is_mode_conflict is initially set to TRUE, and, unless a mode conflict is found, it is set to false in `try_intra_lb_route`. + * + * try_expand_all_modes is set if the node expansion failed to find a valid routing path. The clusterer tries to find another route + * by using all the modes during node expansion. + * + * expand_all_modes is used to enable the expansion of all the nodes using all the possible modes. + */ + t_mode_selection_status mode_status; + bool is_routed = false; + bool do_detailed_routing_stage = (cluster_legalization_strategy_ == ClusterLegalizationStrategy::FULL); + if (do_detailed_routing_stage) { + do { + reset_intra_lb_route(cluster.router_data); + is_routed = try_intra_lb_route(cluster.router_data, log_verbosity_, &mode_status); + } while (do_detailed_routing_stage && mode_status.is_mode_issue()); + } + + if (do_detailed_routing_stage && !is_routed) { + /* Cannot pack */ + VTR_LOGV(log_verbosity_ > 4, "\t\t\tFAILED Detailed Routing Legality\n"); + block_pack_status = e_block_pack_status::BLK_FAILED_ROUTE; + } else { + /* Pack successful, commit + * TODO: SW Engineering note - may want to update cluster stats here too instead of doing it outside + */ + VTR_ASSERT(block_pack_status == e_block_pack_status::BLK_PASSED); + if (molecule->is_chain()) { + /* Chained molecules often take up lots of area and are important, + * if a chain is packed in, want to rename logic block to match chain name */ + AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id]; + t_pb* cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb; + while (cur_pb != nullptr) { + free(cur_pb->name); + cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str()); + cur_pb = cur_pb->parent_pb; + } + // if this molecule is part of a chain, mark the cluster as having a long chain + // molecule. Also check if it's the first molecule in the chain to be packed. + // If so, update the chain id for this chain of molecules to make sure all + // molecules will be packed to the same chain id and can reach each other using + // the chain direct links between clusters + if (molecule->chain_info->is_long_chain) { + cluster_placement_stats_ptr->has_long_chain = true; + if (molecule->chain_info->chain_id == -1) { + update_molecule_chain_info(molecule, primitives_list[molecule->root]); + } + } + } + + //update cluster PartitionRegion if atom with floorplanning constraints was added + if (cluster_pr_update_check) { + cluster.pr = new_cluster_pr; + VTR_LOGV(log_verbosity_ > 2, "\nUpdated PartitionRegion of cluster\n"); + } + + // Update the cluster's NoC group ID. This is cheap so it does + // not need the check like the what the PR did above. + cluster.noc_grp_id = new_cluster_noc_grp_id; + + // Insert the molecule into the cluster for bookkeeping. + cluster.molecules.push_back(molecule); + + for (int i = 0; i < molecule_size; i++) { + AtomBlockId atom_blk_id = molecule->atom_block_ids[i]; + if (!atom_blk_id.is_valid()) + continue; + + /* invalidate all molecules that share atom block with current molecule */ + t_pack_molecule* cur_molecule = prepacker_.get_atom_molecule(atom_blk_id); + // TODO: This should really be named better. Something like + // "is_clustered". and then it should be set to true. + // Right now, valid implies "not clustered" which is + // confusing. + cur_molecule->valid = false; + + commit_primitive(cluster_placement_stats_ptr, primitives_list[i]); + + atom_cluster_[atom_blk_id] = cluster_id; + + // Update the num child blocks in pb + const t_pb* atom_pb = atom_ctx.lookup.atom_pb(atom_blk_id); + VTR_ASSERT_SAFE(atom_pb != nullptr); + t_pb* cur_pb = atom_pb->parent_pb; + while (cur_pb != nullptr) { + cur_pb->pb_stats->num_child_blocks_in_pb++; + cur_pb = cur_pb->parent_pb; + } + } + + // Update the lookahead pins used. + commit_lookahead_pins_used(cluster.pb); + } + } + + if (block_pack_status != e_block_pack_status::BLK_PASSED) { + /* Pack unsuccessful, undo inserting molecule into cluster */ + for (int i = 0; i < failed_location; i++) { + AtomBlockId atom_blk_id = molecule->atom_block_ids[i]; + if (atom_blk_id) { + remove_atom_from_target(cluster.router_data, atom_blk_id); + } + } + for (int i = 0; i < failed_location; i++) { + AtomBlockId atom_blk_id = molecule->atom_block_ids[i]; + if (atom_blk_id) { + revert_place_atom_block(atom_blk_id, cluster.router_data, prepacker_, atom_cluster_); + } + } + + // Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, cluster.pb); + + /* Packing failed, but a part of the pb tree is still allocated and pbs have their modes set. + * Before trying to pack next molecule the unused pbs need to be freed and, the most important, + * their modes reset. This task is performed by the cleanup_pb() function below. */ + cleanup_pb(cluster.pb); + } else { + VTR_LOGV(log_verbosity_ > 3, "\t\tPASSED pack molecule\n"); + } + } + return block_pack_status; +} + +std::tuple +ClusterLegalizer::start_new_cluster(t_pack_molecule* molecule, + t_logical_block_type_ptr cluster_type, + int cluster_mode) { + // Safety asserts to ensure the API is being called with valid arguments. + VTR_ASSERT_DEBUG(molecule != nullptr); + VTR_ASSERT_DEBUG(cluster_type != nullptr); + VTR_ASSERT_DEBUG(cluster_mode < cluster_type->pb_graph_head->pb_type->num_modes); + // Ensure that the molecule has not already been placed. + VTR_ASSERT_SAFE(molecule_cluster_.find(molecule) == molecule_cluster_.end() || + !molecule_cluster_[molecule].is_valid()); + // Safety asserts to ensure that the API was initialized properly. + VTR_ASSERT_DEBUG(cluster_placement_stats_ != nullptr && + lb_type_rr_graphs_ != nullptr); + + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; + + // Create the physical block for this cluster based on the type. + t_pb* cluster_pb = new t_pb; + cluster_pb->pb_graph_node = cluster_type->pb_graph_head; + alloc_and_load_pb_stats(cluster_pb, feasible_block_array_size_); + cluster_pb->parent_pb = nullptr; + cluster_pb->mode = cluster_mode; + + // Allocate and load the LB router data + t_lb_router_data* router_data = alloc_and_load_router_data(&lb_type_rr_graphs_[cluster_type->index], + cluster_type); + + // Reset the cluster placement stats + t_cluster_placement_stats* cluster_placement_stats_ptr = &(cluster_placement_stats_[cluster_type->index]); + reset_cluster_placement_stats(cluster_placement_stats_ptr); + set_mode_cluster_placement_stats(cluster_pb->pb_graph_node, cluster_pb->mode); + + // Create the new cluster + LegalizationCluster new_cluster; + new_cluster.pb = cluster_pb; + new_cluster.router_data = router_data; + new_cluster.pr = PartitionRegion(); + new_cluster.noc_grp_id = NocGroupId::INVALID(); + new_cluster.type = cluster_type; + + // Try to pack the molecule into the new_cluster. + // When starting a new cluster, we set the external pin utilization to full + // (meaning all cluster pins are allowed to be used). + const t_ext_pin_util FULL_EXTERNAL_PIN_UTIL(1., 1.); + LegalizationClusterId new_cluster_id = LegalizationClusterId(legalization_cluster_ids_.size()); + e_block_pack_status pack_status = try_pack_molecule(molecule, + new_cluster, + new_cluster_id, + FULL_EXTERNAL_PIN_UTIL); + + if (pack_status == e_block_pack_status::BLK_PASSED) { + // Give the new cluster pb a name. The current convention is to name the + // cluster after the root atom of the first molecule packed into it. + AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; + const std::string& root_atom_name = atom_nlist.block_name(root_atom); + if (new_cluster.pb->name != nullptr) + free(new_cluster.pb->name); + new_cluster.pb->name = vtr::strdup(root_atom_name.c_str()); + // Move the cluster into the vector of clusters and ids. + legalization_cluster_ids_.push_back(new_cluster_id); + legalization_clusters_.push_back(std::move(new_cluster)); + // Update the molecule to cluster map. + molecule_cluster_[molecule] = new_cluster_id; + } else { + // Delete the new_cluster. + free_pb(new_cluster.pb); + delete new_cluster.pb; + free_router_data(new_cluster.router_data); + new_cluster_id = LegalizationClusterId::INVALID(); + } + + return {pack_status, new_cluster_id}; +} + +e_block_pack_status ClusterLegalizer::add_mol_to_cluster(t_pack_molecule* molecule, + LegalizationClusterId cluster_id) { + // Safety asserts to make sure the inputs are valid. + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + VTR_ASSERT(legalization_cluster_ids_[cluster_id].is_valid() && "Cannot add to a destroyed cluster"); + // Ensure that the molecule has not already been placed. + VTR_ASSERT(molecule_cluster_.find(molecule) == molecule_cluster_.end() || + !molecule_cluster_[molecule].is_valid()); + // Safety asserts to ensure that the API was initialized properly. + VTR_ASSERT_DEBUG(cluster_placement_stats_ != nullptr && + lb_type_rr_graphs_ != nullptr); + + // Get the cluster. + LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + VTR_ASSERT(cluster.router_data != nullptr && "Cannot add molecule to cleaned cluster!"); + // Set the target_external_pin_util. + t_ext_pin_util target_ext_pin_util = target_external_pin_util_.get_pin_util(cluster.type->name); + // Try to pack the molecule into the cluster. + e_block_pack_status pack_status = try_pack_molecule(molecule, + cluster, + cluster_id, + target_ext_pin_util); + + // If the packing was successful, set the molecules' cluster to this one. + if (pack_status == e_block_pack_status::BLK_PASSED) + molecule_cluster_[molecule] = cluster_id; + + return pack_status; +} + +void ClusterLegalizer::destroy_cluster(LegalizationClusterId cluster_id) { + // Safety asserts to make sure the inputs are valid. + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + VTR_ASSERT(legalization_cluster_ids_[cluster_id].is_valid() && "Cannot destroy an already destroyed cluster"); + // Get the cluster. + LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + // Remove all molecules from the cluster. + for (t_pack_molecule* mol : cluster.molecules) { + VTR_ASSERT_SAFE(molecule_cluster_.find(mol) != molecule_cluster_.end() && + molecule_cluster_[mol] == cluster_id); + molecule_cluster_[mol] = LegalizationClusterId::INVALID(); + // The overall clustering algorithm uses this valid flag to indicate + // that a molecule has not been packed (clustered) yet. Since we are + // destroying a cluster, all of its molecules are now no longer clustered + // so they are all validated. + mol->valid = true; + // Revert the placement of all blocks in the molecule. + int molecule_size = get_array_size_of_molecule(mol); + for (int i = 0; i < molecule_size; i++) { + AtomBlockId atom_blk_id = mol->atom_block_ids[i]; + if (atom_blk_id) { + revert_place_atom_block(atom_blk_id, cluster.router_data, prepacker_, atom_cluster_); + } + } + } + cluster.molecules.clear(); + // Free the rest of the cluster data. + // Casting things to nullptr for safety just in case someone is trying to use it. + free_pb(cluster.pb); + delete cluster.pb; + cluster.pb = nullptr; + free_router_data(cluster.router_data); + cluster.router_data = nullptr; + cluster.pr = PartitionRegion(); + + // Mark the cluster as invalid. + legalization_cluster_ids_[cluster_id] = LegalizationClusterId::INVALID(); +} + +void ClusterLegalizer::compress() { + // Create a map from the old ids to the new (compressed) one. + vtr::vector_map cluster_id_map; + cluster_id_map = compress_ids(legalization_cluster_ids_); + // Update all cluster values. + legalization_cluster_ids_ = clean_and_reorder_ids(cluster_id_map); + legalization_clusters_ = clean_and_reorder_values(legalization_clusters_, cluster_id_map); + // Update the reverse lookups. + for (auto& it : molecule_cluster_) { + if (!it.second.is_valid()) + continue; + molecule_cluster_[it.first] = cluster_id_map[it.second]; + } + for (size_t i = 0; i < atom_cluster_.size(); i++) { + AtomBlockId atom_blk_id = AtomBlockId(i); + LegalizationClusterId old_cluster_id = atom_cluster_[atom_blk_id]; + if (!old_cluster_id.is_valid()) + continue; + atom_cluster_[atom_blk_id] = cluster_id_map[old_cluster_id]; + } + // Shrink everything to fit + legalization_cluster_ids_.shrink_to_fit(); + legalization_clusters_.shrink_to_fit(); + atom_cluster_.shrink_to_fit(); +} + +void ClusterLegalizer::clean_cluster(LegalizationClusterId cluster_id) { + // Safety asserts to make sure the inputs are valid. + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + // Get the cluster. + LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + VTR_ASSERT(cluster.router_data != nullptr && "Should not clean an already cleaned cluster!"); + // Free the pb stats. + free_pb_stats_recursive(cluster.pb); + // Load the pb_route so we can free the cluster router data. + // The pb_route is used when creating a netlist from the legalized clusters. + std::vector* saved_lb_nets = cluster.router_data->saved_lb_nets; + t_pb_graph_node* pb_graph_node = cluster.pb->pb_graph_node; + cluster.pb->pb_route = alloc_and_load_pb_route(saved_lb_nets, pb_graph_node); + // Free the router data. + free_router_data(cluster.router_data); + cluster.router_data = nullptr; +} + +// TODO: This is fine for the current implementation of the legalizer. But if +// more complex strategies are added, this will need to be updated to +// check more than just routing (such as PR and NoC groups). +bool ClusterLegalizer::check_cluster_legality(LegalizationClusterId cluster_id) { + // Safety asserts to make sure the inputs are valid. + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + // To check if a cluster is fully legal, try to perform an intra logic block + // route on the cluster. If it succeeds, the cluster is fully legal. + t_mode_selection_status mode_status; + LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + return try_intra_lb_route(cluster.router_data, log_verbosity_, &mode_status); +} + +ClusterLegalizer::ClusterLegalizer(const AtomNetlist& atom_netlist, + const Prepacker& prepacker, + const std::vector& logical_block_types, + std::vector* lb_type_rr_graphs, + size_t num_models, + const std::vector& target_external_pin_util_str, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, + ClusterLegalizationStrategy cluster_legalization_strategy, + bool enable_pin_feasibility_filter, + int feasible_block_array_size, + int log_verbosity) : prepacker_(prepacker) { + // Verify that the inputs are valid. + VTR_ASSERT_SAFE(lb_type_rr_graphs != nullptr); + + // Get the target external pin utilization + // NOTE: This has to be initialized first due to the fact that VPR_FATA_ERROR + // may be called within the constructor of t_ext_pin_util_targets. If + // this occurs, the destructor may or may not be called (honestly I have + // no idea why it does or does not, but it changes based on how VPR + // is compiled...). If the destructor is not called, it is important + // that nothing was allocated before this line is called. If the + // destructor is called, we just need to be careful of double freeing + // (check if the allocated member variables are nullptr). + // FIXME: This can be fixed by removing all allocations from the constructor + // (see cluster_placement_stats_). + target_external_pin_util_ = t_ext_pin_util_targets(target_external_pin_util_str); + + // Resize the atom_cluster lookup to make the accesses much cheaper. + atom_cluster_.resize(atom_netlist.blocks().size(), LegalizationClusterId::INVALID()); + // Allocate the cluster_placement_stats + cluster_placement_stats_ = alloc_and_load_cluster_placement_stats(); + // Pre-compute the max size of any molecule. + max_molecule_size_ = prepacker.get_max_molecule_size(); + // Calculate the max cluster size + // - Limit maximum number of elements for each cluster to MAX_SHORT + max_cluster_size_ = calc_max_cluster_size(logical_block_types); + VTR_ASSERT(max_cluster_size_ < MAX_SHORT); + // Get a reference to the rr graphs. + lb_type_rr_graphs_ = lb_type_rr_graphs; + // Get the number of models in the architecture. + num_models_ = num_models; + // Find all NoC router atoms. + std::vector noc_atoms = find_noc_router_atoms(atom_netlist); + update_noc_reachability_partitions(noc_atoms, + atom_netlist, + high_fanout_thresholds, + atom_noc_grp_id_); + // Copy the options passed by the user + cluster_legalization_strategy_ = cluster_legalization_strategy; + enable_pin_feasibility_filter_ = enable_pin_feasibility_filter; + feasible_block_array_size_ = feasible_block_array_size; + log_verbosity_ = log_verbosity; +} + +void ClusterLegalizer::reset() { + // Destroy all of the clusters and compress. + for (LegalizationClusterId cluster_id : legalization_cluster_ids_) { + if (!cluster_id.is_valid()) + continue; + destroy_cluster(cluster_id); + } + compress(); + // Reset the molecule_cluster map + molecule_cluster_.clear(); + // Reset the cluster placement stats. + free_cluster_placement_stats(cluster_placement_stats_); + cluster_placement_stats_ = alloc_and_load_cluster_placement_stats(); +} + +void ClusterLegalizer::verify() { + std::unordered_set atoms_checked; + auto& atom_ctx = g_vpr_ctx.atom(); + + if (clusters().size() == 0) { + VTR_LOG_WARN("Packing produced no clustered blocks"); + } + + /* + * Check that each atom block connects to one physical primitive and that the primitive links up to the parent clb + */ + for (auto blk_id : atom_ctx.nlist.blocks()) { + //Each atom should be part of a pb + const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); + if (!atom_pb) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Atom block %s is not mapped to a pb\n", + atom_ctx.nlist.block_name(blk_id).c_str()); + } + + //Check the reverse mapping is consistent + if (atom_ctx.lookup.pb_atom(atom_pb) != blk_id) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "pb %s does not contain atom block %s but atom block %s maps to pb.\n", + atom_pb->name, + atom_ctx.nlist.block_name(blk_id).c_str(), + atom_ctx.nlist.block_name(blk_id).c_str()); + } + + VTR_ASSERT(atom_ctx.nlist.block_name(blk_id) == atom_pb->name); + + const t_pb* cur_pb = atom_pb; + while (cur_pb->parent_pb) { + cur_pb = cur_pb->parent_pb; + VTR_ASSERT(cur_pb->name); + } + + LegalizationClusterId cluster_id = get_atom_cluster(blk_id); + if (cluster_id == LegalizationClusterId::INVALID()) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Atom %s is not mapped to a CLB\n", + atom_ctx.nlist.block_name(blk_id).c_str()); + } + + if (cur_pb != get_cluster_pb(cluster_id)) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "CLB %s does not match CLB contained by pb %s.\n", + cur_pb->name, atom_pb->name); + } + } + + /* Check that I do not have spurious links in children pbs */ + for (LegalizationClusterId cluster_id : clusters()) { + if (!cluster_id.is_valid()) + continue; + check_cluster_atom_blocks(get_cluster_pb(cluster_id), + atoms_checked); + } + + for (auto blk_id : atom_ctx.nlist.blocks()) { + if (!atoms_checked.count(blk_id)) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Atom block %s not found in any cluster.\n", + atom_ctx.nlist.block_name(blk_id).c_str()); + } + } +} + +void ClusterLegalizer::finalize() { + for (LegalizationClusterId cluster_id : legalization_cluster_ids_) { + if (!cluster_id.is_valid()) + continue; + // If the cluster has not already been cleaned, clean it. This will + // generate the pb_route necessary for generating a clustered netlist. + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + if (cluster.router_data != nullptr) + clean_cluster(cluster_id); + } +} + +ClusterLegalizer::~ClusterLegalizer() { + // Destroy all clusters (no need to compress). + for (LegalizationClusterId cluster_id : legalization_cluster_ids_) { + if (!cluster_id.is_valid()) + continue; + destroy_cluster(cluster_id); + } + // Free the cluster_placement_stats + if (cluster_placement_stats_ != nullptr) + free_cluster_placement_stats(cluster_placement_stats_); +} + diff --git a/vpr/src/pack/cluster_legalizer.h b/vpr/src/pack/cluster_legalizer.h new file mode 100644 index 00000000000..e3aee27be57 --- /dev/null +++ b/vpr/src/pack/cluster_legalizer.h @@ -0,0 +1,559 @@ +/** + * @file + * @author Alex Singer + * @date September 2024 + * @brief The declaration of the Cluster Legalizer class. + * + * This file declares a class called the ClusterLegalizer which encapsulates all + * logic for creating legal clusters from prepacked molecules. This class is + * designed to be self-contained to the point that it is able to be called + * externally to the Packer in VPR. + */ + +#pragma once + +#include +#include +#include "atom_netlist_fwd.h" +#include "noc_data_types.h" +#include "partition_region.h" +#include "vpr_types.h" +#include "vtr_range.h" +#include "vtr_strong_id.h" +#include "vtr_vector.h" +#include "vtr_vector_map.h" + +class Prepacker; +class t_pb_graph_node; +struct t_lb_router_data; + +// A special ID to identify the legalization clusters. This is separate from the +// ClusterBlockId since this legalizer is not necessarily tied to the Clustered +// netlist, but is used as a sub-routine to it. +struct legalization_cluster_id_tag; +typedef vtr::StrongId LegalizationClusterId; + +/// @brief The different legalization strategies the cluster legalizer can perform. +/// +/// Allows the user of the API to select how thorough the legalizer should be +/// when adding molecules into clusters. +enum class ClusterLegalizationStrategy { + FULL, // Run the full legalizer (including intra-lb routing) + SKIP_INTRA_LB_ROUTE // Do all legality checks except intra-lb routing +}; + +/// @brief The status of the cluster legalization. +enum class e_block_pack_status { + BLK_PASSED, // Passed legalization. + BLK_FAILED_FEASIBLE, // Failed due to block not feasibly being able to go in the cluster. + BLK_FAILED_ROUTE, // Failed due to intra-lb routing failure. + BLK_FAILED_FLOORPLANNING, // Failed due to not being compatible with the cluster's current PartitionRegion. + BLK_FAILED_NOC_GROUP, // Failed due to not being compatible with the cluster's NoC group. + BLK_STATUS_UNDEFINED // Undefined status. Something went wrong. +}; + +/* + * @brief A struct containing information about the cluster. + * + * This contains necessary information for legalizing a cluster. + */ +struct LegalizationCluster { + /// @brief A list of the molecules in the cluster. By design, a cluster will + /// only contain molecules which have been previously legalized into + /// the cluster using a legalization strategy. + std::vector molecules; + + /// @brief The logical block of this cluster. + /// TODO: We should be more careful with how this is allocated. Instead of + /// pointers, we really should use IDs and store them in a standard + /// container. Currently this is being allocated with the new keyword + /// and freed when the cluster is destroyed; however this is used + /// externally to the class and it can be dangerous to pass around + /// a pointer to this object. + t_pb* pb; + + /// @brief The logical block type this cluster represents. + t_logical_block_type_ptr type; + + /// @brief The partition region of legal positions this cluster can be placed. + /// Used to detect if a molecule can physically be placed in a cluster. + /// It is derived from the partition region constraints on the atoms + /// in the cluster (not fundamental but good for performance). + PartitionRegion pr; + + /// @brief The NoC group that this cluster is a part of. Is used to check if + /// a candidate primitive is in the same NoC group as the atom blocks + /// that have already been added to the primitive. This can be helpful + /// for optimization. + NocGroupId noc_grp_id; + + /// @brief The router data of the intra lb router used for this cluster. + /// Contains information about the atoms in the cluster and how they + /// can be routed within. + t_lb_router_data* router_data; +}; + +/* + * @brief A manager class which manages the legalization of clusters. As clusters + * are created, this class will legalize for each molecule added. It also + * provides methods which are helpful for clustering. + * + * Usage: + * The ClusterLegalizer class maintains the clusters within itself since the + * legalization of a cluster depends on the molecules which have already been + * inserted into the clusters prior. + * + * The class provides different legalization strategies the user may use to + * legalize: + * 1) SKIP_INTRA_LB_ROUTE + * 2) FULL + * + * 1) SKIP_INTRA_LB_ROUTE Legalization Strategy Example: + * This strategy will not fully route the interal connections of the clusters + * until when the user specifies. An example of how to use this strategy would + * look something like this. Note, this example is simplified and the result + * of the packings should be checked and handled. + * + * ClusterLegalizer legalizer(..., + * ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE, + * ...); + * + * std::tie(status, new_cluster_id) = legalizer.start_new_cluster(seed_mol, + * cluster_type, + * mode); + * for mol in molecules_to_add: + * // Cheaper additions, but may pack a molecule that wouldn't route. + * status = legalizer.add_mol_to_cluster(mol, new_cluster_id); + * if (status != e_block_pack_status::BLK_PASSED) + * break; + * + * // Do the expensive check once all molecules are in. + * if (!legalizer.check_cluster_legality(new_cluster_id)) + * // Destroy the illegal cluster. + * legalizer.destroy_cluster(new_cluster_id); + * // Clean-up the internal bookeeping of the class (required after + * // destroying a cluster). + * legalizer.compress(); + * // Handle how to try again (maybe use FULL strategy). + * + * 2) FULL Legalization Strategy Example: + * This strategy will fully route the internal connections of the clusters for + * each molecule added. This is much more expensive to run; however, will ensure + * that the cluster is fully legalized while it is being created. An example + * of how to use this strategy would look something like this: + * + * Clusterlegalizer legalizer(..., + * ClusterLegalizationStrategy::FULL, + * ...); + * + * std::tie(pack_result, new_cluster_id) = legalizer.start_new_cluster(seed_mol, + * cluster_type, + * mode); + * for mol in molecules_to_add: + * // Do the expensive check for each molecule added. + * status = legalizer.add_mol_to_cluster(mol, new_cluster_id); + * if (status != e_block_pack_status::BLK_PASSED) + * break; + * + * // new_cluster_id now contains a fully legalized cluster. + */ +class ClusterLegalizer { +public: + // Iterator for the legalization cluster IDs + typedef typename vtr::vector_map::const_iterator cluster_iterator; + + // Range for the legalization cluster IDs + typedef typename vtr::Range cluster_range; + +private: + + /* + * @brief Helper method that tries to pack the given molecule into a cluster. + * + * This method runs all the legality checks specified by the legalization + * strategy. If the molecule can be packed into the cluster, it will insert + * it into the cluster. + * + * @param molecule The molecule to insert into the cluster. + * @param cluster The cluster to try to insert the molecule into. + * @param cluster_id The ID of the cluster. + * @param max_external_pin_util The max external pin utilization for a + * cluster of this type. + */ + e_block_pack_status try_pack_molecule(t_pack_molecule* molecule, + LegalizationCluster& cluster, + LegalizationClusterId cluster_id, + const t_ext_pin_util& max_external_pin_util); + +public: + + // Explicitly deleted default constructor. Need to use other constructor to + // initialize state correctly. + ClusterLegalizer() = delete; + + /* + * @brief Initialize the ClusterLegalizer class. + * + * Allocates internal state. + * + * @param atom_netlist The complete atom netlist. Used to allocate + * internal structures to the correct size. + * @param prepacker The prepacker object used to prepack the atoms + * into molecules. A reference to this object is + * stored internally to be used to lookup the + * molecules of atoms. + * @param logical_block_types Used to allocate internal objects. Used to + * get the max number of primitives in any block + * type. + * @param lb_type_rr_graphs The routing resource graph internal to the + * different cluster types. A reference is stored + * in the class to be used to allocate and load + * the router data. + * @param num_models The total number of models in the architecture. + * This is the sum of the number of the user and + * library models. Used internally to allocate data + * structures. + * @param target_external_pin_util_str A string used to initialize the + * target external pin utilization of + * each cluster type. + * @param high_fanout_thresholds An object that stores the thresholds for + * a net to be considered high fanout for + * different block types. + * @param cluster_legalization_strategy The legalization strategy to be + * used when creating clusters and + * adding molecules to clusters. + * Controls the checks that are performed. + * @param enable_pin_feasibility_filter A flag to turn on/off the check for + * pin usage feasibility. + * @param feasible_block_array_size The largest number of feasible blocks + * that can be stored in a cluster. Used + * to allocate an internal structure. + * @param log_verbosity Controls how verbose the log messages will + * be within this class. + * + * TODO: A lot of these arguments are only used to allocate C-style arrays + * since the original author was avoiding dynamic allocations. It may + * be more space efficient (and cleaner) to make these dynamic arrays + * and not pass these arguments in. + */ + ClusterLegalizer(const AtomNetlist& atom_netlist, + const Prepacker& prepacker, + const std::vector& logical_block_types, + std::vector* lb_type_rr_graphs, + size_t num_models, + const std::vector& target_external_pin_util_str, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, + ClusterLegalizationStrategy cluster_legalization_strategy, + bool enable_pin_feasibility_filter, + int feasible_block_array_size, + int log_verbosity); + + // This class allocates and deallocates memory within. This class should not + // be copied or moved to prevent it from double freeing / losing pointers. + ClusterLegalizer(const ClusterLegalizer&) = delete; + ClusterLegalizer& operator=(const ClusterLegalizer&) = delete; + + /* + * @brief Start a new legalization cluster with the given molecule. + * + * @param molecule The seed molecule used to start the new cluster. + * @param cluster_type The type of the cluster to start. + * @param cluster_mode The mode of the new cluster for the given type. + * + * @return A pair for the status of the packing and the ID of the new + * cluster. If the new cluster could not be created, the pack + * status will return the reason and the ID would be invalid. + */ + std::tuple + start_new_cluster(t_pack_molecule* molecule, + t_logical_block_type_ptr cluster_type, + int cluster_mode); + + /* + * @brief Add an unclustered molecule to the given legalization cluster. + * + * The ClusterLegalizationStrategy (set either in the constructor or by the + * set_cluster_legalization_strategy method) decides what checks are + * performed when adding a molecule to the cluster. + * + * If the addition was unsuccessful (i.e. a check fails), the molecule will + * remain unclustered. + * + * @param molecule The molecule to add to the cluster. + * @param cluster_id The ID of the cluster to add the molecule to. + * + * @return The status of the pack (if the addition was successful and + * if not why). + */ + e_block_pack_status add_mol_to_cluster(t_pack_molecule* molecule, + LegalizationClusterId cluster_id); + + /* + * @brief Destroy the given cluster. + * + * This unclusters all molecules in the cluster so they can be re-clustered + * into different clusters. Should call the compress() method after destroying + * one or more clusters. + * + * @param cluster_id The ID of the cluster to destroy. + */ + void destroy_cluster(LegalizationClusterId cluster_id); + + /* + * @brief Compress the internal storage of clusters. Should be called + * eventually after one or more clusters are destroyed. + * + * Similar to the Netlist compress method. Will invalidate all Legalization + * Cluster IDs. + * + * This method can be quite expensive, so it is a good idea to batch many + * cluster destructions and then compress at the end. + */ + void compress(); + + /* + * @brief A range of all cluster IDs in the legalizer. + * + * If the legalizer has been compressed (or no clusters have been destroyed) + * then all cluster IDs in this list will be valid and represent a non-empty + * legalization cluster. + */ + cluster_range clusters() const { + return vtr::make_range(legalization_cluster_ids_.begin(), legalization_cluster_ids_.end()); + } + + /* + * @brief Check that the given cluster is fully legal. + * + * This method runs an intra_lb_route on the given cluster. This ignores + * the cluster legalization strategy set by the user. This method will not + * correct the problematic molecules, it will only return true if the + * cluster is legal and false if it is not. + * + * @param cluster_id The ID of the cluster to fully legalize. + * + * @return True if the cluster is legal, false otherwise. + */ + bool check_cluster_legality(LegalizationClusterId cluster_id); + + /* + * @brief Cleans the cluster of unnessary data, reducing the memory footprint. + * + * After this function is called, no more molecules can be added to the + * cluster. This method will ensure that the cluster has enough information + * to generate a clustered netlist from the legalized clusters. + * + * Specifically, this frees the pb stats (which is used by the clusterer + * to compute the gain) and the router data of the cluster. + * + * TODO: The pb stats should really not be calculated or stored in the + * cluster legalizer. + * + * @param cluster_id The ID of the cluster to clean. + */ + void clean_cluster(LegalizationClusterId cluster_id); + + /* + * @brief Verify that all atoms have been clustered into some cluster. + * + * This will not verify if all the clusters are fully legal. + */ + void verify(); + + /* + * @brief Finalize the clustering. Required for generating a Clustered + * Netlist. + * + * Before generating a Clustered Netlist, each cluster needs to allocate and + * load a pb_route. This method will generate a pb_route for each cluster + * and store it into the clusters' pb. + */ + void finalize(); + + /* + * @brief Resets the legalizer to its initial state. + * + * Destroys all clusters and resets the cluster placement stats. + */ + void reset(); + + /// @brief Gets the top-level pb of the given cluster. + inline t_pb* get_cluster_pb(LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + return cluster.pb; + } + + /// @brief Gets the logical block type of the given cluster. + inline t_logical_block_type_ptr get_cluster_type(LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + return cluster.type; + } + + /// @brief Gets the current partition region (the intersection of all + /// contained atoms) of the given cluster. + inline const PartitionRegion& get_cluster_pr(LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + return cluster.pr; + } + + /// @brief Gets the ID of the cluster that contains the given atom block. + inline LegalizationClusterId get_atom_cluster(AtomBlockId blk_id) const { + VTR_ASSERT_SAFE(blk_id.is_valid() && (size_t)blk_id < atom_cluster_.size()); + return atom_cluster_[blk_id]; + } + + /// @brief Gets the cluster placement stats of the given cluster. + /// + /// The cluster placement stats are statistics used to monitor which atoms + /// have been physically clustered into the pb (more specifically what site + /// they will go). This can be used externally to the legalizer to detect + /// if an atom could physically go into a cluster (exists_free_primitive_for_atom_block). + /// + /// TODO: Releasing the whole stats can be dangerous. Ideally there should + /// just be a method to see if an atom could physically go in a cluster. + inline t_cluster_placement_stats* get_cluster_placement_stats(LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + return &(cluster_placement_stats_[get_cluster_type(cluster_id)->index]); + } + + /// @brief Returns true if the given atom block has been packed into a + /// cluster, false otherwise. + inline bool is_atom_clustered(AtomBlockId blk_id) const { + // Simply, if the atom is not in an invalid cluster, it has been clustered. + return get_atom_cluster(blk_id) != LegalizationClusterId::INVALID(); + } + + /// @brief Returns a reference to the target_external_pin_util object. This + /// allows the user to modify the external pin utilization if needed. + inline t_ext_pin_util_targets& get_target_external_pin_util() { + return target_external_pin_util_; + } + + /// @bried Gets the max size a cluster could physically be. + /// + /// This is the maximum number of primitives any cluster could ever have + /// in the architecture. + inline size_t get_max_cluster_size() const { + return max_cluster_size_; + } + + /* + * @brief Set the legalization strategy of the cluster legalizer. + * + * This allows the strategy of the cluster legalizer to change based on the + * needs of the user. For example, one can set the legalizer to use a more + * relaxed strategy to insert a batch of molecules in cheaply, saving the + * full legalizerion for the end (using check_cluster_legality). + * + * @param strategy The strategy to set the cluster legalizer to. + */ + inline void set_legalization_strategy(ClusterLegalizationStrategy strategy) { + cluster_legalization_strategy_ = strategy; + } + + /* + * @brief Set how verbose the log messages should be for the cluster legalizer. + * + * This allows the user to set the verbosity at different points for easier + * usability. + * + * Set the verbosity to 4 to see most of the log messages on how the + * molecules move through the legalizer. + * Set the verbosity to 5 to see all the log messages in the legalizer. + * + * @param verbosity The value to set the verbosity to. + */ + inline void set_log_verbosity(int verbosity) { + log_verbosity_ = verbosity; + } + + /// @brief Destructor of the class. Frees allocated data. + ~ClusterLegalizer(); + +private: + /// @brief A vector of the legalization cluster IDs. If any of them are + /// invalid, then that means that the cluster has been destroyed. + vtr::vector_map legalization_cluster_ids_; + + /// @brief Lookup table for which cluster each molecule is in. + std::unordered_map molecule_cluster_; + + /// @brief List of all legalization clusters. + vtr::vector_map legalization_clusters_; + + /// @brief A lookup-table for which cluster the given atom is packed into. + vtr::vector_map atom_cluster_; + + /// @brief Stores the NoC group ID of each atom block. Atom blocks that + /// belong to different NoC groups can't be clustered with each other + /// into the same clustered block. Under some optimization settings + /// to improve placement locality / NoC usage. Atoms with different + /// NoC group IDs belong to logic that is disjoint except through + /// NoC traffic. + vtr::vector atom_noc_grp_id_; + + /// @brief Stats keeper for placement information during packing/clustering. + /// TODO: This should be a vector. + /// FIXME: This keeps the stats for each cluster type. This is fine within + /// the clusterer, however it yields a limitation where two clusters + /// of the same type cannot be constructed at the same time. This + /// should stored per cluster. + t_cluster_placement_stats* cluster_placement_stats_ = nullptr; + + /// @brief The maximum fractional utilization of cluster external + /// input/output pins during packing (between 0 and 1). + t_ext_pin_util_targets target_external_pin_util_; + + /// @brief The max size of any molecule. This is used to allocate a dynamic + /// array within the legalizer, and in its current form this is a bit + /// expensive to calculate from the prepacker. + size_t max_molecule_size_; + + /// @brief The max number of primitives a cluster could physically have. + /// This is used to allocate dynamic arrays. + size_t max_cluster_size_; + + /// @brief A vector of routing resource nodes within each logical block type + /// [0 .. num_logical_block_types-1] + /// TODO: This really should not be a pointer to a vector... I think this is + /// meant to be a vector of vectors... + std::vector* lb_type_rr_graphs_ = nullptr; + + /// @brief The total number of models (user + library) in the architecture. + /// Used to allocate space in dynamic data structures. + size_t num_models_; + + /// @brief The current legalization strategy of the cluster legalizer. + ClusterLegalizationStrategy cluster_legalization_strategy_; + + /// @brief Controls whether the pin counting feasibility filter is used + /// during clustering. When enabled the clustering engine counts the + /// number of available pins in groups/classes of mutually connected + /// pins within a cluster. These counts are used to quickly filter + /// out candidate primitives/atoms/molecules for which the cluster + /// has insufficient pins to route (without performing a full + /// routing). This reduces packing run-time. This matches the packer + /// option of the same name. + bool enable_pin_feasibility_filter_; + + /// @brief The max size of the priority queue for candidates that pass the + /// early filter legality test but not the more detailed routing + /// filter. This matches the packer option of the same name. + int feasible_block_array_size_; + + /// @brief Used to set the verbosity of log messages in the legalizer. Used + /// for debugging. When log_verbosity > 3, the legalizer will print + /// messages when a molecule is successful during legalization. When + /// log_verbosity is > 4, the legalizer will print when a molecule + /// fails a legality check. This parameter is also passed into the + /// intra-lb router. + int log_verbosity_; + + /// @brief The prepacker object that stores the molecules which will be + /// legalized into clusters. + const Prepacker& prepacker_; +}; + diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index 8fd0bcfa56f..39940410b40 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -1,11 +1,13 @@ #include "cluster_util.h" #include +#include #include "PreClusterTimingGraphResolver.h" #include "PreClusterDelayCalculator.h" #include "atom_netlist.h" -#include "cluster_router.h" +#include "cluster_legalizer.h" #include "cluster_placement.h" +#include "clustered_netlist.h" #include "concrete_timing_info.h" #include "output_clustering.h" #include "prepack.h" @@ -19,48 +21,8 @@ /* Global variables in clustering */ /**********************************/ -/* TODO: May want to check that all atom blocks are actually reached */ -static void check_cluster_atom_blocks(t_pb* pb, std::unordered_set& blocks_checked) { - int i, j; - const t_pb_type* pb_type; - bool has_child = false; - auto& atom_ctx = g_vpr_ctx.atom(); - - pb_type = pb->pb_graph_node->pb_type; - if (pb_type->num_modes == 0) { - /* primitive */ - auto blk_id = atom_ctx.lookup.pb_atom(pb); - if (blk_id) { - if (blocks_checked.count(blk_id)) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "pb %s contains atom block %s but atom block is already contained in another pb.\n", - pb->name, atom_ctx.nlist.block_name(blk_id).c_str()); - } - blocks_checked.insert(blk_id); - if (pb != atom_ctx.lookup.atom_pb(blk_id)) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "pb %s contains atom block %s but atom block does not link to pb.\n", - pb->name, atom_ctx.nlist.block_name(blk_id).c_str()); - } - } - } else { - /* this is a container pb, all container pbs must contain children */ - for (i = 0; i < pb_type->modes[pb->mode].num_pb_type_children; i++) { - for (j = 0; j < pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { - if (pb->child_pbs[i] != nullptr) { - if (pb->child_pbs[i][j].name != nullptr) { - has_child = true; - check_cluster_atom_blocks(&pb->child_pbs[i][j], blocks_checked); - } - } - } - } - VTR_ASSERT(has_child); - } -} - /*Print the contents of each cluster to an echo file*/ -static void echo_clusters(char* filename) { +static void echo_clusters(char* filename, const ClusterLegalizer& cluster_legalizer) { FILE* fp; fp = vtr::fopen(filename, "w"); @@ -70,22 +32,21 @@ static void echo_clusters(char* filename) { fprintf(fp, "\n"); auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - std::map> cluster_atoms; + std::map> cluster_atoms; - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - cluster_atoms.insert({blk_id, std::vector()}); + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + cluster_atoms.insert({cluster_id, std::vector()}); } for (auto atom_blk_id : atom_ctx.nlist.blocks()) { - ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(atom_blk_id); + LegalizationClusterId cluster_id = cluster_legalizer.get_atom_cluster(atom_blk_id); - cluster_atoms[clb_index].push_back(atom_blk_id); + cluster_atoms[cluster_id].push_back(atom_blk_id); } for (auto& cluster_atom : cluster_atoms) { - const std::string& cluster_name = cluster_ctx.clb_nlist.block_name(cluster_atom.first); + const std::string& cluster_name = cluster_legalizer.get_cluster_pb(cluster_atom.first)->name; fprintf(fp, "Cluster %s Id: %zu \n", cluster_name.c_str(), size_t(cluster_atom.first)); fprintf(fp, "\tAtoms in cluster: \n"); @@ -98,12 +59,11 @@ static void echo_clusters(char* filename) { } fprintf(fp, "\nCluster Floorplanning Constraints:\n"); - const auto& floorplanning_ctx = g_vpr_ctx.floorplanning(); - for (ClusterBlockId clb_id : cluster_ctx.clb_nlist.blocks()) { - const std::vector& regions = floorplanning_ctx.cluster_constraints[clb_id].get_regions(); + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + const std::vector& regions = cluster_legalizer.get_cluster_pr(cluster_id).get_regions(); if (!regions.empty()) { - fprintf(fp, "\nRegions in Cluster %zu:\n", size_t(clb_id)); + fprintf(fp, "\nRegions in Cluster %zu:\n", size_t(cluster_id)); for (const auto& region : regions) { print_region(fp, region); } @@ -113,81 +73,13 @@ static void echo_clusters(char* filename) { fclose(fp); } -/* TODO: Add more error checking! */ -void check_clustering() { - std::unordered_set atoms_checked; - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - if (cluster_ctx.clb_nlist.blocks().size() == 0) { - VTR_LOG_WARN("Packing produced no clustered blocks"); - } - - /* - * Check that each atom block connects to one physical primitive and that the primitive links up to the parent clb - */ - for (auto blk_id : atom_ctx.nlist.blocks()) { - //Each atom should be part of a pb - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); - if (!atom_pb) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Atom block %s is not mapped to a pb\n", - atom_ctx.nlist.block_name(blk_id).c_str()); - } - - //Check the reverse mapping is consistent - if (atom_ctx.lookup.pb_atom(atom_pb) != blk_id) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "pb %s does not contain atom block %s but atom block %s maps to pb.\n", - atom_pb->name, - atom_ctx.nlist.block_name(blk_id).c_str(), - atom_ctx.nlist.block_name(blk_id).c_str()); - } - - VTR_ASSERT(atom_ctx.nlist.block_name(blk_id) == atom_pb->name); - - const t_pb* cur_pb = atom_pb; - while (cur_pb->parent_pb) { - cur_pb = cur_pb->parent_pb; - VTR_ASSERT(cur_pb->name); - } - - ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(blk_id); - if (clb_index == ClusterBlockId::INVALID()) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Atom %s is not mapped to a CLB\n", - atom_ctx.nlist.block_name(blk_id).c_str()); - } - - if (cur_pb != cluster_ctx.clb_nlist.block_pb(clb_index)) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "CLB %s does not match CLB contained by pb %s.\n", - cur_pb->name, atom_pb->name); - } - } - - /* Check that I do not have spurious links in children pbs */ - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - check_cluster_atom_blocks(cluster_ctx.clb_nlist.block_pb(blk_id), atoms_checked); - } - - for (auto blk_id : atom_ctx.nlist.blocks()) { - if (!atoms_checked.count(blk_id)) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Atom block %s not found in any cluster.\n", - atom_ctx.nlist.block_name(blk_id).c_str()); - } - } -} - -//calculate the initial timing at the start of packing stage void calc_init_packing_timing(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const Prepacker& prepacker, std::shared_ptr& clustering_delay_calc, std::shared_ptr& timing_info, vtr::vector& atom_criticality) { - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); /* * Initialize the timing analyzer @@ -233,94 +125,35 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, } } -//Free the clustering data structures void free_clustering_data(const t_packer_opts& packer_opts, t_clustering_data& clustering_data) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) - free_intra_lb_nets(clustering_data.intra_lb_routing[blk_id]); - - clustering_data.intra_lb_routing.clear(); if (packer_opts.hill_climbing_flag) delete[] clustering_data.hill_climbing_inputs_avail; - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) - cluster_ctx.clb_nlist.remove_block(blk_id); - - cluster_ctx.clb_nlist = ClusteredNetlist(); - delete[] clustering_data.unclustered_list_head; delete[] clustering_data.memory_pool; } -//check the clustering and output it -void check_and_output_clustering(const t_packer_opts& packer_opts, +void check_and_output_clustering(ClusterLegalizer& cluster_legalizer, + const t_packer_opts& packer_opts, const std::unordered_set& is_clock, - const t_arch* arch, - const int& num_clb, - const vtr::vector*>& intra_lb_routing) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - VTR_ASSERT(num_clb == (int)cluster_ctx.clb_nlist.blocks().size()); - check_clustering(); + const t_arch* arch) { + cluster_legalizer.verify(); if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_CLUSTERS)) { - echo_clusters(getEchoFileName(E_ECHO_CLUSTERS)); - } - - output_clustering(intra_lb_routing, packer_opts.global_clocks, is_clock, arch->architecture_id, packer_opts.output_file.c_str(), false); - - VTR_ASSERT(cluster_ctx.clb_nlist.blocks().size() == intra_lb_routing.size()); -} - -void get_max_cluster_size_and_pb_depth(int& max_cluster_size, - int& max_pb_depth) { - auto& device_ctx = g_vpr_ctx.mutable_device(); - int cur_cluster_size, cur_pb_depth; - - for (const auto& type : device_ctx.logical_block_types) { - if (is_empty_type(&type)) - continue; - - cur_cluster_size = get_max_primitives_in_pb_type(type.pb_type); - cur_pb_depth = get_max_depth_of_pb_type(type.pb_type); - if (cur_cluster_size > max_cluster_size) { - max_cluster_size = cur_cluster_size; - } - if (cur_pb_depth > max_pb_depth) { - max_pb_depth = cur_pb_depth; - } + echo_clusters(getEchoFileName(E_ECHO_CLUSTERS), cluster_legalizer); } -} -bool check_cluster_legality(const int& verbosity, - const int& detailed_routing_stage, - t_lb_router_data* router_data) { - bool is_cluster_legal; - - if (detailed_routing_stage == (int)E_DETAILED_ROUTE_AT_END_ONLY) { - /* is_mode_conflict does not affect this stage. It is needed when trying to route the packed clusters. - * - * It holds a flag that is used to verify whether try_intra_lb_route ended in a mode conflict issue. - * If the value is TRUE the cluster has to be repacked, and its internal pb_graph_nodes will have more restrict choices - * for what regards the mode that has to be selected - */ - t_mode_selection_status mode_status; - is_cluster_legal = try_intra_lb_route(router_data, verbosity, &mode_status); - if (is_cluster_legal) { - VTR_LOGV(verbosity > 2, "\tPassed route at end.\n"); - } else { - VTR_LOGV(verbosity > 0, "Failed route at end, repack cluster trying detailed routing at each stage.\n"); - } - } else { - is_cluster_legal = true; - } - return is_cluster_legal; + output_clustering(&cluster_legalizer, + packer_opts.global_clocks, + is_clock, + arch->architecture_id, + packer_opts.output_file.c_str(), + false, /*skip_clustering*/ + true /*from_legalizer*/); } -/*print the header for the clustering progress table*/ void print_pack_status_header() { VTR_LOG("Starting Clustering - Clustering Progress: \n"); VTR_LOG("------------------- -------------------------- ---------\n"); @@ -328,14 +161,14 @@ void print_pack_status_header() { VTR_LOG("------------------- -------------------------- ---------\n"); } -/*incrementally print progress updates during clustering*/ void print_pack_status(int num_clb, int tot_num_molecules, int num_molecules_processed, int& mols_since_last_print, int device_width, int device_height, - AttractionInfo& attraction_groups) { + AttractionInfo& attraction_groups, + const ClusterLegalizer& cluster_legalizer) { //Print a packing update each time another 4% of molecules have been packed. const float print_frequency = 0.04; @@ -361,18 +194,13 @@ void print_pack_status(int num_clb, fflush(stdout); mols_since_last_print = 0; if (attraction_groups.num_attraction_groups() > 0) { - rebuild_attraction_groups(attraction_groups); + rebuild_attraction_groups(attraction_groups, cluster_legalizer); } } } -/* - * Periodically rebuild the attraction groups to reflect which atoms in them - * are still available for new clusters (i.e. remove the atoms that have already - * been packed from the attraction group). - */ -void rebuild_attraction_groups(AttractionInfo& attraction_groups) { - auto& atom_ctx = g_vpr_ctx.atom(); +void rebuild_attraction_groups(AttractionInfo& attraction_groups, + const ClusterLegalizer& cluster_legalizer) { for (int igroup = 0; igroup < attraction_groups.num_attraction_groups(); igroup++) { AttractGroupId group_id(igroup); @@ -380,8 +208,7 @@ void rebuild_attraction_groups(AttractionInfo& attraction_groups) { AttractionGroup new_att_group_info; for (AtomBlockId atom : group.group_atoms) { - //If the ClusterBlockId is anything other than invalid, the atom has been packed already - if (atom_ctx.lookup.atom_clb(atom) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(atom)) { new_att_group_info.group_atoms.push_back(atom); } } @@ -390,9 +217,8 @@ void rebuild_attraction_groups(AttractionInfo& attraction_groups) { } } -/* Determine if atom block is in pb */ bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb) { - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); while (cur_pb) { @@ -404,9 +230,6 @@ bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb) { return false; } -/* Remove blk from list of feasible blocks sorted according to gain - * Useful for removing blocks that are repeatedly failing. If a block - * has been found to be illegal, we don't repeatedly consider it.*/ void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, t_pb* pb) { int molecule_index; @@ -432,7 +255,6 @@ void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, pb->pb_stats->num_feasible_blocks--; } -/* Add blk to list of feasible blocks sorted according to gain */ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, std::map& gain, t_pb* pb, @@ -502,8 +324,6 @@ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, /*****************************************/ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, - t_cluster_placement_stats** cluster_placement_stats, - t_pb_graph_node*** primitives_list, const Prepacker& prepacker, t_clustering_data& clustering_data, std::unordered_map& net_output_feeds_driving_block_input, @@ -560,160 +380,14 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, } } } - - /* alloc and load cluster placement info */ - *cluster_placement_stats = alloc_and_load_cluster_placement_stats(); - - /* alloc array that will store primitives that a molecule gets placed to, - * primitive_list is referenced by index, for example a atom block in index 2 of a molecule matches to a primitive in index 2 in primitive_list - * this array must be the size of the biggest molecule - */ - size_t max_molecule_size = prepacker.get_max_molecule_size(); - *primitives_list = new t_pb_graph_node*[max_molecule_size]; - for (size_t i = 0; i < max_molecule_size; i++) - (*primitives_list)[i] = nullptr; } /*****************************************/ -void free_pb_stats_recursive(t_pb* pb) { - int i, j; - /* Releases all the memory used by clustering data structures. */ - if (pb) { - if (pb->pb_graph_node != nullptr) { - if (!pb->pb_graph_node->is_primitive()) { - for (i = 0; i < pb->pb_graph_node->pb_type->modes[pb->mode].num_pb_type_children; i++) { - for (j = 0; j < pb->pb_graph_node->pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { - if (pb->child_pbs && pb->child_pbs[i]) { - free_pb_stats_recursive(&pb->child_pbs[i][j]); - } - } - } - } - } - free_pb_stats(pb); - } -} - -bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb) { - const t_pb_type* cur_pb_type = cur_pb->pb_graph_node->pb_type; - - VTR_ASSERT(cur_pb_type->num_modes == 0); /* primitive */ - - auto& atom_ctx = g_vpr_ctx.atom(); - AtomBlockId cur_pb_blk_id = atom_ctx.lookup.pb_atom(cur_pb); - if (cur_pb_blk_id && cur_pb_blk_id != blk_id) { - /* This pb already has a different logical block */ - return false; - } - - if (cur_pb_type->class_type == MEMORY_CLASS) { - /* Memory class has additional feasibility requirements: - * - all siblings must share all nets, including open nets, with the exception of data nets */ - - /* find sibling if one exists */ - AtomBlockId sibling_memory_blk_id = find_memory_sibling(cur_pb); - - if (sibling_memory_blk_id) { - //There is a sibling, see if the current block is feasible with it - bool sibling_feasible = primitive_memory_sibling_feasible(blk_id, cur_pb_type, sibling_memory_blk_id); - if (!sibling_feasible) { - return false; - } - } - } - - //Generic feasibility check - return primitive_type_feasible(blk_id, cur_pb_type); -} - -bool primitive_memory_sibling_feasible(const AtomBlockId blk_id, const t_pb_type* cur_pb_type, const AtomBlockId sibling_blk_id) { - /* Check that the two atom blocks blk_id and sibling_blk_id (which should both be memory slices) - * are feasible, in the sence that they have precicely the same net connections (with the - * exception of nets in data port classes). - * - * Note that this routine does not check pin feasibility against the cur_pb_type; so - * primitive_type_feasible() should also be called on blk_id before concluding it is feasible. - */ - auto& atom_ctx = g_vpr_ctx.atom(); - VTR_ASSERT(cur_pb_type->class_type == MEMORY_CLASS); - - //First, identify the 'data' ports by looking at the cur_pb_type - std::unordered_set data_ports; - for (int iport = 0; iport < cur_pb_type->num_ports; ++iport) { - const char* port_class = cur_pb_type->ports[iport].port_class; - if (port_class && strstr(port_class, "data") == port_class) { - //The port_class starts with "data", so it is a data port - - //Record the port - data_ports.insert(cur_pb_type->ports[iport].model_port); - } - } - - //Now verify that all nets (except those connected to data ports) are equivalent - //between blk_id and sibling_blk_id - - //Since the atom netlist stores only in-use ports, we iterate over the model to ensure - //all ports are compared - const t_model* model = cur_pb_type->model; - for (t_model_ports* port : {model->inputs, model->outputs}) { - for (; port; port = port->next) { - if (data_ports.count(port)) { - //Don't check data ports - continue; - } - - //Note: VPR doesn't support multi-driven nets, so all outputs - //should be data ports, otherwise the siblings will both be - //driving the output net - - //Get the ports from each primitive - auto blk_port_id = atom_ctx.nlist.find_atom_port(blk_id, port); - auto sib_port_id = atom_ctx.nlist.find_atom_port(sibling_blk_id, port); - - //Check that all nets (including unconnected nets) match - for (int ipin = 0; ipin < port->size; ++ipin) { - //The nets are initialized as invalid (i.e. disconnected) - AtomNetId blk_net_id; - AtomNetId sib_net_id; - - //We can get the actual net provided the port exists - // - //Note that if the port did not exist, the net is left - //as invalid/disconneced - if (blk_port_id) { - blk_net_id = atom_ctx.nlist.port_net(blk_port_id, ipin); - } - if (sib_port_id) { - sib_net_id = atom_ctx.nlist.port_net(sib_port_id, ipin); - } - - //The sibling and block must have the same (possibly disconnected) - //net on this pin - if (blk_net_id != sib_net_id) { - //Nets do not match, not feasible - return false; - } - } - } - } - - return true; -} -/*****************************************/ t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, const enum e_removal_policy remove_flag, t_cluster_placement_stats* cluster_placement_stats_ptr, t_molecule_link* unclustered_list_head) { - /* This routine returns an atom block which has not been clustered, has * - * no connection to the current cluster, satisfies the cluster * - * clock constraints, is a valid subblock inside the cluster, does not exceed the cluster subblock units available, - * and has ext_inps external inputs. If * - * there is no such atom block it returns ClusterBlockId::INVALID(). Remove_flag * - * controls whether or not blocks that have already been clustered * - * are removed from the unclustered_list data structures. NB: * - * to get a atom block regardless of clock constraints just set clocks_ * - * avail > 0. */ t_molecule_link *ptr, *prev_ptr; int i; @@ -759,12 +433,7 @@ t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb t_cluster_placement_stats* cluster_placement_stats_ptr, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size) { - /* This routine is used to find new blocks for clustering when there are no feasible * - * blocks with any attraction to the current cluster (i.e. it finds * - * blocks which are unconnected from the current cluster). It returns * - * the atom block with the largest number of used inputs that satisfies the * - * clocking and number of inputs constraints. If no suitable atom block is * - * found, the routine returns ClusterBlockId::INVALID(). + /* * TODO: Analyze if this function is useful in more detail, also, should probably not include clock in input count */ @@ -790,684 +459,30 @@ t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb } /*****************************************/ -void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size) { - /* Call this routine when starting to fill up a new cluster. It resets * - * the gain vector, etc. */ - - pb->pb_stats = new t_pb_stats; - - /* If statement below is for speed. If nets are reasonably low-fanout, * - * only a relatively small number of blocks will be marked, and updating * - * only those atom block structures will be fastest. If almost all blocks * - * have been touched it should be faster to just run through them all * - * in order (less addressing and better cache locality). */ - pb->pb_stats->input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); - pb->pb_stats->output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); - pb->pb_stats->lookahead_input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); - pb->pb_stats->lookahead_output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); - pb->pb_stats->num_feasible_blocks = NOT_VALID; - pb->pb_stats->feasible_blocks = new t_pack_molecule*[feasible_block_array_size]; - - for (int i = 0; i < feasible_block_array_size; i++) - pb->pb_stats->feasible_blocks[i] = nullptr; - - pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); - - pb->pb_stats->pulled_from_atom_groups = 0; - pb->pb_stats->num_att_group_atoms_used = 0; - - pb->pb_stats->gain.clear(); - pb->pb_stats->timinggain.clear(); - pb->pb_stats->connectiongain.clear(); - pb->pb_stats->sharinggain.clear(); - pb->pb_stats->hillgain.clear(); - pb->pb_stats->transitive_fanout_candidates.clear(); - - pb->pb_stats->num_pins_of_net_in_pb.clear(); - - pb->pb_stats->num_child_blocks_in_pb = 0; - - pb->pb_stats->explore_transitive_fanout = true; -} -/*****************************************/ - -/** - * Cleans up a pb after unsuccessful molecule packing - * - * Recursively frees pbs from a t_pb tree. The given root pb itself is not - * deleted. - * - * If a pb object has its children allocated then before freeing them the - * function checks if there is no atom that corresponds to any of them. The - * check is performed only for leaf (primitive) pbs. The function recurses for - * non-primitive pbs. - * - * The cleaning itself includes deleting all child pbs, resetting mode of the - * pb and also freeing its name. This prepares the pb for another round of - * molecule packing tryout. - */ -bool cleanup_pb(t_pb* pb) { - bool can_free = true; - - /* Recursively check if there are any children with already assigned atoms */ - if (pb->child_pbs != nullptr) { - const t_mode* mode = &pb->pb_graph_node->pb_type->modes[pb->mode]; - VTR_ASSERT(mode != nullptr); - - /* Check each mode */ - for (int i = 0; i < mode->num_pb_type_children; ++i) { - /* Check each child */ - if (pb->child_pbs[i] != nullptr) { - for (int j = 0; j < mode->pb_type_children[i].num_pb; ++j) { - t_pb* pb_child = &pb->child_pbs[i][j]; - t_pb_type* pb_type = pb_child->pb_graph_node->pb_type; - - /* Primitive, check occupancy */ - if (pb_type->num_modes == 0) { - if (pb_child->name != nullptr) { - can_free = false; - } - } - - /* Non-primitive, recurse */ - else { - if (!cleanup_pb(pb_child)) { - can_free = false; - } - } - } - } - } - - /* Free if can */ - if (can_free) { - for (int i = 0; i < mode->num_pb_type_children; ++i) { - if (pb->child_pbs[i] != nullptr) { - delete[] pb->child_pbs[i]; - } - } - - delete[] pb->child_pbs; - pb->child_pbs = nullptr; - pb->mode = 0; - - if (pb->name) { - free(pb->name); - pb->name = nullptr; - } - } - } - - return can_free; -} - -/** - * Performs legality checks to see whether the selected molecule can be - * packed into the current cluster. The legality checks are related to - * floorplanning, pin feasibility, and routing (if detailed route - * checking is enabled). The routine returns BLK_PASSED if the molecule - * can be packed in the cluster. If the block passes, the routine commits - * it to the current cluster and updates the appropriate data structures. - * Otherwise, it returns the appropriate failed pack status based on which - * legality check the molecule failed. - */ -e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, - t_pack_molecule* molecule, - t_pb_graph_node** primitives_list, - t_pb* pb, - int max_models, - int max_cluster_size, - ClusterBlockId clb_index, - int detailed_routing_stage, - t_lb_router_data* router_data, - int verbosity, - bool enable_pin_feasibility_filter, - int feasible_block_array_size, - t_ext_pin_util max_external_pin_util, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id, - int force_site) { - t_pb* parent; - t_pb* cur_pb; - - const auto& atom_ctx = g_vpr_ctx.atom(); - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - parent = nullptr; - - const int molecule_size = get_array_size_of_molecule(molecule); - - if (verbosity > 3) { - AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; - VTR_LOG("\t\tTry pack molecule: '%s' (%s)", - atom_ctx.nlist.block_name(root_atom).c_str(), - atom_ctx.nlist.block_model(root_atom)->name); - VTR_LOGV(molecule->pack_pattern, - " molecule_type %s molecule_size %zu", - molecule->pack_pattern->name, - molecule->atom_block_ids.size()); - VTR_LOG("\n"); - } - - // if this cluster has a molecule placed in it that is part of a long chain - // (a chain that consists of more than one molecule), don't allow more long chain - // molecules to be placed in this cluster. To avoid possibly creating cluster level - // blocks that have incompatible placement constraints or form very long placement - // macros that limit placement flexibility. - if (cluster_placement_stats_ptr->has_long_chain && molecule->is_chain() && molecule->chain_info->is_long_chain) { - VTR_LOGV(verbosity > 4, "\t\t\tFAILED Placement Feasibility Filter: Only one long chain per cluster is allowed\n"); - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - return e_block_pack_status::BLK_FAILED_FEASIBLE; - } - - bool cluster_pr_update_check = false; - - //check if every atom in the molecule is legal in the cluster from a floorplanning perspective - for (int i_mol = 0; i_mol < molecule_size; i_mol++) { - //try to intersect with atom PartitionRegion if atom exists - if (molecule->atom_block_ids[i_mol]) { - bool cluster_pr_needs_update = false; - bool block_pack_floorplan_status = atom_cluster_floorplanning_check(molecule->atom_block_ids[i_mol], - clb_index, verbosity, - temp_cluster_pr, - cluster_pr_needs_update); - - if (!block_pack_floorplan_status) { - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - return e_block_pack_status::BLK_FAILED_FLOORPLANNING; - } - - if (cluster_pr_needs_update) { - cluster_pr_update_check = true; - } - } - } - - // check if all atoms in the molecule can be added to the cluster without NoC group conflicts - for (int i_mol = 0; i_mol < molecule_size; i_mol++) { - if (molecule->atom_block_ids[i_mol]) { - bool block_pack_noc_grp_status = atom_cluster_noc_group_check(molecule->atom_block_ids[i_mol], - clb_index, verbosity, - temp_noc_grp_id); - - if (!block_pack_noc_grp_status) { - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - return e_block_pack_status::BLK_FAILED_NOC_GROUP; - } - } - } - - e_block_pack_status block_pack_status = e_block_pack_status::BLK_STATUS_UNDEFINED; - - while (block_pack_status != e_block_pack_status::BLK_PASSED) { - if (get_next_primitive_list(cluster_placement_stats_ptr, molecule, - primitives_list, force_site)) { - block_pack_status = e_block_pack_status::BLK_PASSED; - - int failed_location = 0; - - for (int i_mol = 0; i_mol < molecule_size && block_pack_status == e_block_pack_status::BLK_PASSED; i_mol++) { - VTR_ASSERT((primitives_list[i_mol] == nullptr) == (!molecule->atom_block_ids[i_mol])); - failed_location = i_mol + 1; - // try place atom block if it exists - if (molecule->atom_block_ids[i_mol]) { - block_pack_status = try_place_atom_block_rec(primitives_list[i_mol], - molecule->atom_block_ids[i_mol], pb, &parent, - max_models, max_cluster_size, clb_index, - cluster_placement_stats_ptr, molecule, router_data, - verbosity, feasible_block_array_size); - } - } - - if (enable_pin_feasibility_filter && block_pack_status == e_block_pack_status::BLK_PASSED) { - /* Check if pin usage is feasible for the current packing assignment */ - reset_lookahead_pins_used(pb); - try_update_lookahead_pins_used(pb); - if (!check_lookahead_pins_used(pb, max_external_pin_util)) { - VTR_LOGV(verbosity > 4, "\t\t\tFAILED Pin Feasibility Filter\n"); - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - } - } - - if (block_pack_status == e_block_pack_status::BLK_PASSED) { - /* - * during the clustering step of `do_clustering`, `detailed_routing_stage` is incremented at each iteration until it a cluster - * is correctly generated or `detailed_routing_stage` assumes an invalid value (E_DETAILED_ROUTE_INVALID). - * depending on its value we have different behaviors: - * - E_DETAILED_ROUTE_AT_END_ONLY: Skip routing if heuristic is to route at the end of packing complex block. - * - E_DETAILED_ROUTE_FOR_EACH_ATOM: Try to route if heuristic is to route for every atom. If the clusterer arrives at this stage, - * it means that more checks have to be performed as the previous stage failed to generate a new cluster. - * - * mode_status is a data structure containing the status of the mode selection. Its members are: - * - bool is_mode_conflict - * - bool try_expand_all_modes - * - bool expand_all_modes - * - * is_mode_conflict affects this stage. Its value determines whether the cluster failed to pack after a mode conflict issue. - * It holds a flag that is used to verify whether try_intra_lb_route ended in a mode conflict issue. - * - * Until is_mode_conflict is set to FALSE by try_intra_lb_route, the loop re-iterates. If all the available modes are exhausted - * an error will be thrown during mode conflicts checks (this to prevent infinite loops). - * - * If the value is TRUE the cluster has to be re-routed, and its internal pb_graph_nodes will have more restrict choices - * for what regards the mode that has to be selected. - * - * is_mode_conflict is initially set to TRUE, and, unless a mode conflict is found, it is set to false in `try_intra_lb_route`. - * - * try_expand_all_modes is set if the node expansion failed to find a valid routing path. The clusterer tries to find another route - * by using all the modes during node expansion. - * - * expand_all_modes is used to enable the expansion of all the nodes using all the possible modes. - */ - t_mode_selection_status mode_status; - bool is_routed = false; - bool do_detailed_routing_stage = detailed_routing_stage == (int)E_DETAILED_ROUTE_FOR_EACH_ATOM; - if (do_detailed_routing_stage) { - do { - reset_intra_lb_route(router_data); - is_routed = try_intra_lb_route(router_data, verbosity, &mode_status); - } while (do_detailed_routing_stage && mode_status.is_mode_issue()); - } - - if (do_detailed_routing_stage && !is_routed) { - /* Cannot pack */ - VTR_LOGV(verbosity > 4, "\t\t\tFAILED Detailed Routing Legality\n"); - block_pack_status = e_block_pack_status::BLK_FAILED_ROUTE; - } else { - /* Pack successful, commit - * TODO: SW Engineering note - may want to update cluster stats here too instead of doing it outside - */ - VTR_ASSERT(block_pack_status == e_block_pack_status::BLK_PASSED); - if (molecule->is_chain()) { - /* Chained molecules often take up lots of area and are important, - * if a chain is packed in, want to rename logic block to match chain name */ - AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id]; - cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb; - while (cur_pb != nullptr) { - free(cur_pb->name); - cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str()); - cur_pb = cur_pb->parent_pb; - } - // if this molecule is part of a chain, mark the cluster as having a long chain - // molecule. Also check if it's the first molecule in the chain to be packed. - // If so, update the chain id for this chain of molecules to make sure all - // molecules will be packed to the same chain id and can reach each other using - // the chain direct links between clusters - if (molecule->chain_info->is_long_chain) { - cluster_placement_stats_ptr->has_long_chain = true; - if (molecule->chain_info->chain_id == -1) { - update_molecule_chain_info(molecule, primitives_list[molecule->root]); - } - } - } - - //update cluster PartitionRegion if atom with floorplanning constraints was added - if (cluster_pr_update_check) { - floorplanning_ctx.cluster_constraints[clb_index] = temp_cluster_pr; - VTR_LOGV(verbosity > 2, "\nUpdated PartitionRegion of cluster %d\n", clb_index); - } - - for (int i = 0; i < molecule_size; i++) { - if (molecule->atom_block_ids[i]) { - /* invalidate all molecules that share atom block with current molecule */ - t_pack_molecule* cur_molecule = atom_ctx.prepacker.get_atom_molecule(molecule->atom_block_ids[i]); - cur_molecule->valid = false; - - commit_primitive(cluster_placement_stats_ptr, primitives_list[i]); - } - } - } - } - - if (block_pack_status != e_block_pack_status::BLK_PASSED) { - for (int i = 0; i < failed_location; i++) { - if (molecule->atom_block_ids[i]) { - remove_atom_from_target(router_data, molecule->atom_block_ids[i]); - } - } - for (int i = 0; i < failed_location; i++) { - if (molecule->atom_block_ids[i]) { - revert_place_atom_block(molecule->atom_block_ids[i], router_data); - } - } - - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - - /* Packing failed, but a part of the pb tree is still allocated and pbs have their modes set. - * Before trying to pack next molecule the unused pbs need to be freed and, the most important, - * their modes reset. This task is performed by the cleanup_pb() function below. */ - cleanup_pb(pb); - - } else { - VTR_LOGV(verbosity > 3, "\t\tPASSED pack molecule\n"); - } - } else { - VTR_LOGV(verbosity > 3, "\t\tFAILED No candidate primitives available\n"); - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - break; /* no more candidate primitives available, this molecule will not pack, return fail */ - } - } - return block_pack_status; -} - -/* Record the failure of the molecule in this cluster in the current pb stats. - * If a molecule fails repeatedly, it's gain will be penalized if packing with - * attraction groups on. */ -void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb) { - //Only have to record the failure for the first atom in the molecule. - //The convention when checking if a molecule has failed to pack in the cluster - //is to check whether the first atoms has been recorded as having failed - - auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); - if (got == pb->pb_stats->atom_failures.end()) { - pb->pb_stats->atom_failures.insert({molecule->atom_block_ids[0], 1}); - } else { - got->second++; - } -} - -/** - * Try place atom block into current primitive location - */ - -enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, - const AtomBlockId blk_id, - t_pb* cb, - t_pb** parent, - const int max_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const t_cluster_placement_stats* cluster_placement_stats_ptr, - const t_pack_molecule* molecule, - t_lb_router_data* router_data, - int verbosity, - const int feasible_block_array_size) { - int i, j; - bool is_primitive; - enum e_block_pack_status block_pack_status; - - t_pb* my_parent; - t_pb *pb, *parent_pb; - const t_pb_type* pb_type; - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - my_parent = nullptr; - - block_pack_status = e_block_pack_status::BLK_PASSED; - - /* Discover parent */ - if (pb_graph_node->parent_pb_graph_node != cb->pb_graph_node) { - block_pack_status = try_place_atom_block_rec(pb_graph_node->parent_pb_graph_node, blk_id, cb, - &my_parent, max_models, max_cluster_size, clb_index, - cluster_placement_stats_ptr, molecule, router_data, - verbosity, feasible_block_array_size); - parent_pb = my_parent; - } else { - parent_pb = cb; - } - - /* Create siblings if siblings are not allocated */ - if (parent_pb->child_pbs == nullptr) { - atom_ctx.lookup.set_atom_pb(AtomBlockId::INVALID(), parent_pb); - - VTR_ASSERT(parent_pb->name == nullptr); - parent_pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); - parent_pb->mode = pb_graph_node->pb_type->parent_mode->index; - set_reset_pb_modes(router_data, parent_pb, true); - const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; - parent_pb->child_pbs = new t_pb*[mode->num_pb_type_children]; - - for (i = 0; i < mode->num_pb_type_children; i++) { - parent_pb->child_pbs[i] = new t_pb[mode->pb_type_children[i].num_pb]; - - for (j = 0; j < mode->pb_type_children[i].num_pb; j++) { - parent_pb->child_pbs[i][j].parent_pb = parent_pb; - - atom_ctx.lookup.set_atom_pb(AtomBlockId::INVALID(), &parent_pb->child_pbs[i][j]); - - parent_pb->child_pbs[i][j].pb_graph_node = &(parent_pb->pb_graph_node->child_pb_graph_nodes[parent_pb->mode][i][j]); - } - } - } else { - /* if this is not the first child of this parent, must match existing parent mode */ - if (parent_pb->mode != pb_graph_node->pb_type->parent_mode->index) { - return e_block_pack_status::BLK_FAILED_FEASIBLE; - } - } - - const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; - for (i = 0; i < mode->num_pb_type_children; i++) { - if (pb_graph_node->pb_type == &mode->pb_type_children[i]) { - break; - } - } - VTR_ASSERT(i < mode->num_pb_type_children); - pb = &parent_pb->child_pbs[i][pb_graph_node->placement_index]; - *parent = pb; /* this pb is parent of it's child that called this function */ - VTR_ASSERT(pb->pb_graph_node == pb_graph_node); - if (pb->pb_stats == nullptr) { - alloc_and_load_pb_stats(pb, feasible_block_array_size); - } - pb_type = pb_graph_node->pb_type; - - /* Any pb_type under an mode, which is disabled for packing, should not be considerd for mapping - * Early exit to flag failure - */ - if (true == pb_type->parent_mode->disable_packing) { - return e_block_pack_status::BLK_FAILED_FEASIBLE; - } - is_primitive = (pb_type->num_modes == 0); - - if (is_primitive) { - VTR_ASSERT(!atom_ctx.lookup.pb_atom(pb) - && atom_ctx.lookup.atom_pb(blk_id) == nullptr - && atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()); - /* try pack to location */ - VTR_ASSERT(pb->name == nullptr); - pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); - - //Update the atom netlist mappings - atom_ctx.lookup.set_atom_clb(blk_id, clb_index); - atom_ctx.lookup.set_atom_pb(blk_id, pb); - - add_atom_as_target(router_data, blk_id); - if (!primitive_feasible(blk_id, pb)) { - /* failed location feasibility check, revert pack */ - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - } - - // if this block passed and is part of a chained molecule - if (block_pack_status == e_block_pack_status::BLK_PASSED && molecule->is_chain()) { - auto molecule_root_block = molecule->atom_block_ids[molecule->root]; - // if this is the root block of the chain molecule check its placmeent feasibility - if (blk_id == molecule_root_block) { - block_pack_status = check_chain_root_placement_feasibility(pb_graph_node, molecule, blk_id); - } - } - - VTR_LOGV(verbosity > 4 && block_pack_status == e_block_pack_status::BLK_PASSED, - "\t\t\tPlaced atom '%s' (%s) at %s\n", - atom_ctx.nlist.block_name(blk_id).c_str(), - atom_ctx.nlist.block_model(blk_id)->name, - pb->hierarchical_type_name().c_str()); - } - - if (block_pack_status != e_block_pack_status::BLK_PASSED) { - free(pb->name); - pb->name = nullptr; - } - return block_pack_status; -} - -/* - * Checks if the atom and cluster have compatible floorplanning constraints - * If the atom and cluster both have non-empty PartitionRegions, and the intersection - * of the PartitionRegions is empty, the atom cannot be packed in the cluster. - */ -bool atom_cluster_floorplanning_check(AtomBlockId blk_id, - ClusterBlockId clb_index, - int verbosity, - PartitionRegion& temp_cluster_pr, - bool& cluster_pr_needs_update) { - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - - /*check if the atom can go in the cluster by checking if the atom and cluster have intersecting PartitionRegions*/ - - //get partition that atom belongs to - PartitionId partid; - partid = floorplanning_ctx.constraints.get_atom_partition(blk_id); - - //if the atom does not belong to a partition, it can be put in the cluster - //regardless of what the cluster's PartitionRegion is because it has no constraints - if (partid == PartitionId::INVALID()) { - VTR_LOGV(verbosity > 3, - "\t\t\t Intersect: Atom block %d has no floorplanning constraints, passed for cluster %d \n", - blk_id, clb_index); - cluster_pr_needs_update = false; - return true; - } else { - //get pr of that partition - const PartitionRegion& atom_pr = floorplanning_ctx.constraints.get_partition_pr(partid); - - //intersect it with the pr of the current cluster - PartitionRegion cluster_pr = floorplanning_ctx.cluster_constraints[clb_index]; - - if (cluster_pr.empty()) { - temp_cluster_pr = atom_pr; - cluster_pr_needs_update = true; - VTR_LOGV(verbosity > 3, - "\t\t\t Intersect: Atom block %d has floorplanning constraints, passed cluster %d which has empty PR\n", - blk_id, clb_index); - return true; - } else { - //update cluster_pr with the intersection of the cluster's PartitionRegion - //and the atom's PartitionRegion - update_cluster_part_reg(cluster_pr, atom_pr); - } - - // At this point, cluster_pr is the intersection of atom_pr and the clusters current pr - if (cluster_pr.empty()) { - VTR_LOGV(verbosity > 3, - "\t\t\t Intersect: Atom block %d failed floorplanning check for cluster %d \n", - blk_id, clb_index); - cluster_pr_needs_update = false; - return false; - } else { - //update the cluster's PartitionRegion with the intersecting PartitionRegion - temp_cluster_pr = cluster_pr; - cluster_pr_needs_update = true; - VTR_LOGV(verbosity > 3, - "\t\t\t Intersect: Atom block %d passed cluster %d, cluster PR was updated with intersection result \n", - blk_id, clb_index); - return true; - } - } -} - -bool atom_cluster_noc_group_check(AtomBlockId blk_id, - ClusterBlockId clb_index, - int verbosity, - NocGroupId& temp_cluster_noc_grp_id) { - const auto& atom_noc_grp_ids = g_vpr_ctx.cl_helper().atom_noc_grp_id; - const NocGroupId atom_noc_grp_id = atom_noc_grp_ids.empty() ? NocGroupId::INVALID() : atom_noc_grp_ids[blk_id]; - - if (temp_cluster_noc_grp_id == NocGroupId::INVALID()) { - // the cluster does not have a NoC group - // assign the atom's NoC group to cluster - VTR_LOGV(verbosity > 3, - "\t\t\t NoC Group: Atom block %d passed cluster %d, cluster's NoC group was updated with the atom's group %d\n", - blk_id, clb_index, (size_t)atom_noc_grp_id); - temp_cluster_noc_grp_id = atom_noc_grp_id; - return true; - } else if (temp_cluster_noc_grp_id == atom_noc_grp_id) { - // the cluster has the same NoC group ID as the atom, - // so they are compatible - VTR_LOGV(verbosity > 3, - "\t\t\t NoC Group: Atom block %d passed cluster %d, cluster's NoC group was compatible with the atom's group %d\n", - blk_id, clb_index, (size_t)atom_noc_grp_id); - return true; - } else { - // the cluster belongs to a different NoC group than the atom's group, - // so they are incompatible - VTR_LOGV(verbosity > 3, - "\t\t\t NoC Group: Atom block %d failed NoC group check for cluster %d. Cluster's NoC group: %d, atom's NoC group: %d\n", - blk_id, clb_index, (size_t)temp_cluster_noc_grp_id, size_t(atom_noc_grp_id)); - return false; - } -} - -/* Revert trial atom block iblock and free up memory space accordingly - */ -void revert_place_atom_block(const AtomBlockId blk_id, t_lb_router_data* router_data) { - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - //We cast away const here since we may free the pb, and it is - //being removed from the active mapping. - // - //In general most code works fine accessing cosnt t_pb*, - //which is why we store them as such in atom_ctx.lookup - t_pb* pb = const_cast(atom_ctx.lookup.atom_pb(blk_id)); - - if (pb != nullptr) { - /* When freeing molecules, the current block might already have been freed by a prior revert - * When this happens, no need to do anything beyond basic book keeping at the atom block - */ - - t_pb* next = pb->parent_pb; - revalid_molecules(pb); - free_pb(pb); - pb = next; - - while (pb != nullptr) { - /* If this is pb is created only for the purposes of holding new molecule, remove it - * Must check if cluster is already freed (which can be the case) - */ - next = pb->parent_pb; - - if (pb->child_pbs != nullptr && pb->pb_stats != nullptr - && pb->pb_stats->num_child_blocks_in_pb == 0) { - set_reset_pb_modes(router_data, pb, false); - if (next != nullptr) { - /* If the code gets here, then that means that placing the initial seed molecule - * failed, don't free the actual complex block itself as the seed needs to find - * another placement */ - revalid_molecules(pb); - free_pb(pb); - } - } - pb = next; - } - } - - //Update the atom netlist mapping - atom_ctx.lookup.set_atom_clb(blk_id, ClusterBlockId::INVALID()); - atom_ctx.lookup.set_atom_pb(blk_id, nullptr); -} - -void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clustered_blk_id, t_pb* cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block) { +void update_connection_gain_values(const AtomNetId net_id, + const AtomBlockId clustered_blk_id, + t_pb* cur_pb, + const ClusterLegalizer& cluster_legalizer, + enum e_net_relation_to_clustered_block net_relation_to_clustered_block) { /*This function is called when the connectiongain values on the net net_id* *require updating. */ + const AtomContext& atom_ctx = g_vpr_ctx.atom(); int num_internal_connections, num_open_connections, num_stuck_connections; num_internal_connections = num_open_connections = num_stuck_connections = 0; - auto& atom_ctx = g_vpr_ctx.atom(); - ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(clustered_blk_id); + LegalizationClusterId legalization_cluster_id = cluster_legalizer.get_atom_cluster(clustered_blk_id); /* may wish to speed things up by ignoring clock nets since they are high fanout */ for (auto pin_id : atom_ctx.nlist.net_pins(net_id)) { auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == clb_index + if (cluster_legalizer.get_atom_cluster(blk_id) == legalization_cluster_id && is_atom_blk_in_pb(blk_id, atom_ctx.lookup.atom_pb(clustered_blk_id))) { num_internal_connections++; - } else if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + } else if (!cluster_legalizer.is_atom_clustered(blk_id)) { num_open_connections++; } else { num_stuck_connections++; @@ -1479,7 +494,7 @@ void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clu auto blk_id = atom_ctx.nlist.pin_block(pin_id); VTR_ASSERT(blk_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { /* TODO: Gain function accurate only if net has one connection to block, * TODO: Should we handle case where net has multi-connection to block? * Gain computation is only off by a bit in this case */ @@ -1502,7 +517,7 @@ void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clu auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); auto blk_id = atom_ctx.nlist.pin_block(driver_pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) { cur_pb->pb_stats->connectiongain[blk_id] = 0; } @@ -1514,53 +529,33 @@ void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clu } } -void try_fill_cluster(const t_packer_opts& packer_opts, +void try_fill_cluster(ClusterLegalizer& cluster_legalizer, + const Prepacker& prepacker, + const t_packer_opts& packer_opts, t_cluster_placement_stats* cur_cluster_placement_stats_ptr, t_pack_molecule*& prev_molecule, t_pack_molecule*& next_molecule, int& num_same_molecules, - t_pb_graph_node** primitives_list, t_cluster_progress_stats& cluster_stats, int num_clb, - const int num_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const int detailed_routing_stage, + const LegalizationClusterId legalization_cluster_id, AttractionInfo& attraction_groups, - vtr::vector>& clb_inter_blk_nets, + vtr::vector>& clb_inter_blk_nets, bool allow_unrelated_clustering, const int& high_fanout_threshold, const std::unordered_set& is_clock, const std::unordered_set& is_global, const std::shared_ptr& timing_info, - t_lb_router_data* router_data, - t_ext_pin_util target_ext_pin_util, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id, e_block_pack_status& block_pack_status, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, std::unordered_map& net_output_feeds_driving_block_input, std::map>& primitive_candidate_block_types) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - block_pack_status = try_pack_molecule(cur_cluster_placement_stats_ptr, - next_molecule, - primitives_list, - cluster_ctx.clb_nlist.block_pb(clb_index), - num_models, - max_cluster_size, - clb_index, - detailed_routing_stage, - router_data, - packer_opts.pack_verbosity, - packer_opts.enable_pin_feasibility_filter, - packer_opts.feasible_block_array_size, - target_ext_pin_util, - temp_cluster_pr, - temp_noc_grp_id); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + const DeviceContext& device_ctx = g_vpr_ctx.device(); + + block_pack_status = cluster_legalizer.add_mol_to_cluster(next_molecule, + legalization_cluster_id); auto blk_id = next_molecule->atom_block_ids[next_molecule->root]; VTR_ASSERT(blk_id); @@ -1588,7 +583,7 @@ void try_fill_cluster(const t_packer_opts& packer_opts, } } - next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), + next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), attraction_groups, allow_unrelated_clustering, packer_opts.prioritize_transitive_connectivity, @@ -1596,8 +591,11 @@ void try_fill_cluster(const t_packer_opts& packer_opts, packer_opts.feasible_block_array_size, &cluster_stats.num_unrelated_clustering_attempts, cur_cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, clb_inter_blk_nets, - clb_index, packer_opts.pack_verbosity, + legalization_cluster_id, + packer_opts.pack_verbosity, unclustered_list_head, unclustered_list_head_size, primitive_candidate_block_types); @@ -1625,9 +623,11 @@ void try_fill_cluster(const t_packer_opts& packer_opts, cluster_stats.mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height(), - attraction_groups); + attraction_groups, + cluster_legalizer); - update_cluster_stats(next_molecule, clb_index, + update_cluster_stats(next_molecule, + cluster_legalizer, is_clock, //Set of all clocks is_global, //Set of all global signals (currently clocks) packer_opts.global_clocks, packer_opts.alpha, packer_opts.beta, packer_opts.timing_driven, @@ -1641,7 +641,7 @@ void try_fill_cluster(const t_packer_opts& packer_opts, if (packer_opts.timing_driven) { cluster_stats.blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */ } - next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), + next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), attraction_groups, allow_unrelated_clustering, packer_opts.prioritize_transitive_connectivity, @@ -1649,8 +649,10 @@ void try_fill_cluster(const t_packer_opts& packer_opts, packer_opts.feasible_block_array_size, &cluster_stats.num_unrelated_clustering_attempts, cur_cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, clb_inter_blk_nets, - clb_index, + legalization_cluster_id, packer_opts.pack_verbosity, unclustered_list_head, unclustered_list_head_size, @@ -1661,78 +663,37 @@ void try_fill_cluster(const t_packer_opts& packer_opts, } } -t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& packer_opts, - const int& num_clb, - const std::vector& seed_atoms, - const int& num_blocks_hill_added, - vtr::vector*>& intra_lb_routing, - int& seedindex, - t_cluster_progress_stats& cluster_stats, - t_lb_router_data* router_data) { - t_pack_molecule* next_seed = nullptr; - - intra_lb_routing.push_back(router_data->saved_lb_nets); - VTR_ASSERT((int)intra_lb_routing.size() == num_clb); - router_data->saved_lb_nets = nullptr; - - //Pick a new seed - next_seed = get_highest_gain_seed_molecule(seedindex, seed_atoms); - - if (packer_opts.timing_driven) { - if (num_blocks_hill_added > 0) { - cluster_stats.blocks_since_last_analysis += num_blocks_hill_added; - } - } - return next_seed; -} - void store_cluster_info_and_free(const t_packer_opts& packer_opts, - const ClusterBlockId& clb_index, + const LegalizationClusterId legalization_cluster_id, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count, - vtr::vector>& clb_inter_blk_nets) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& atom_ctx = g_vpr_ctx.atom(); + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); /* store info that will be used later in packing from pb_stats and free the rest */ - t_pb_stats* pb_stats = cluster_ctx.clb_nlist.block_pb(clb_index)->pb_stats; + t_pb* cur_pb = cluster_legalizer.get_cluster_pb(legalization_cluster_id); + t_pb_stats* pb_stats = cur_pb->pb_stats; for (const AtomNetId mnet_id : pb_stats->marked_nets) { int external_terminals = atom_ctx.nlist.net_pins(mnet_id).size() - pb_stats->num_pins_of_net_in_pb[mnet_id]; /* Check if external terminals of net is within the fanout limit and that there exists external terminals */ if (external_terminals < packer_opts.transitive_fanout_threshold && external_terminals > 0) { - clb_inter_blk_nets[clb_index].push_back(mnet_id); + clb_inter_blk_nets[legalization_cluster_id].push_back(mnet_id); } } - auto cur_pb = cluster_ctx.clb_nlist.block_pb(clb_index); // update the data structure holding the LE counts update_le_count(cur_pb, logic_block_type, le_pb_type, le_count); //print clustering progress incrementally //print_pack_status(num_clb, num_molecules, num_molecules_processed, mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height()); - free_pb_stats_recursive(cur_pb); -} - -/* Free up data structures and requeue used molecules */ -void free_data_and_requeue_used_mols_if_illegal(const ClusterBlockId& clb_index, - const int& savedseedindex, - std::map& num_used_type_instances, - int& num_clb, - int& seedindex) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - num_used_type_instances[cluster_ctx.clb_nlist.block_type(clb_index)]--; - revalid_molecules(cluster_ctx.clb_nlist.block_pb(clb_index)); - cluster_ctx.clb_nlist.remove_block(clb_index); - cluster_ctx.clb_nlist.compress(); - num_clb--; - seedindex = savedseedindex; } /*****************************************/ void update_timing_gain_values(const AtomNetId net_id, t_pb* cur_pb, + const ClusterLegalizer& cluster_legalizer, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, const SetupTimingInfo& timing_info, const std::unordered_set& is_global, @@ -1741,7 +702,7 @@ void update_timing_gain_values(const AtomNetId net_id, *net_id requires updating. */ float timinggain; - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); /* Check if this atom net lists its driving atom block twice. If so, avoid * * double counting this atom block by skipping the first (driving) pin. */ @@ -1753,7 +714,7 @@ void update_timing_gain_values(const AtomNetId net_id, && !is_global.count(net_id)) { for (auto pin_id : pins) { auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { timinggain = timing_info.setup_pin_criticality(pin_id); if (cur_pb->pb_stats->timinggain.count(blk_id) == 0) { @@ -1772,7 +733,7 @@ void update_timing_gain_values(const AtomNetId net_id, auto driver_pin = atom_ctx.nlist.net_driver(net_id); auto new_blk_id = atom_ctx.nlist.pin_block(driver_pin); - if (atom_ctx.lookup.atom_clb(new_blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(new_blk_id)) { for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { timinggain = timing_info.setup_pin_criticality(pin_id); @@ -1790,6 +751,7 @@ void update_timing_gain_values(const AtomNetId net_id, void mark_and_update_partial_gain(const AtomNetId net_id, enum e_gain_update gain_flag, const AtomBlockId clustered_blk_id, + const ClusterLegalizer& cluster_legalizer, bool timing_driven, bool connection_driven, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, @@ -1797,15 +759,8 @@ void mark_and_update_partial_gain(const AtomNetId net_id, const std::unordered_set& is_global, const int high_fanout_net_threshold, std::unordered_map& net_output_feeds_driving_block_input) { - /* Updates the marked data structures, and if gain_flag is GAIN, * - * the gain when an atom block is added to a cluster. The * - * sharinggain is the number of inputs that a atom block shares with * - * blocks that are already in the cluster. Hillgain is the * - * reduction in number of pins-required by adding a atom block to the * - * cluster. The timinggain is the criticality of the most critical* - * atom net between this atom block and an atom block in the cluster. */ - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); t_pb* cur_pb = atom_ctx.lookup.atom_pb(clustered_blk_id)->parent_pb; cur_pb = get_top_level_pb(cur_pb); @@ -1845,7 +800,7 @@ void mark_and_update_partial_gain(const AtomNetId net_id, if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) { for (auto pin_id : pins) { auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { if (cur_pb->pb_stats->sharinggain.count(blk_id) == 0) { cur_pb->pb_stats->marked_blocks.push_back(blk_id); cur_pb->pb_stats->sharinggain[blk_id] = 1; @@ -1860,11 +815,12 @@ void mark_and_update_partial_gain(const AtomNetId net_id, if (connection_driven) { update_connection_gain_values(net_id, clustered_blk_id, cur_pb, + cluster_legalizer, net_relation_to_clustered_block); } if (timing_driven) { - update_timing_gain_values(net_id, cur_pb, + update_timing_gain_values(net_id, cur_pb, cluster_legalizer, net_relation_to_clustered_block, timing_info, is_global, @@ -1879,10 +835,7 @@ void mark_and_update_partial_gain(const AtomNetId net_id, /*****************************************/ void update_total_gain(float alpha, float beta, bool timing_driven, bool connection_driven, t_pb* pb, AttractionInfo& attraction_groups) { - /*Updates the total gain array to reflect the desired tradeoff between* - *input sharing (sharinggain) and path_length minimization (timinggain) - *input each time a new molecule is added to the cluster.*/ - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); t_pb* cur_pb = pb; cur_pb = get_top_level_pb(cur_pb); @@ -1938,7 +891,7 @@ void update_total_gain(float alpha, float beta, bool timing_driven, bool connect /*****************************************/ void update_cluster_stats(const t_pack_molecule* molecule, - const ClusterBlockId clb_index, + const ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, const std::unordered_set& is_global, const bool global_clocks, @@ -1950,16 +903,12 @@ void update_cluster_stats(const t_pack_molecule* molecule, const SetupTimingInfo& timing_info, AttractionInfo& attraction_groups, std::unordered_map& net_output_feeds_driving_block_input) { - /* Routine that is called each time a new molecule is added to the cluster. - * Makes calls to update cluster stats such as the gain map for atoms, used pins, and clock structures, - * in order to reflect the new content of the cluster. - * Also keeps track of which attraction group the cluster belongs to. */ int molecule_size; int iblock; t_pb *cur_pb, *cb; - auto& atom_ctx = g_vpr_ctx.mutable_atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); molecule_size = get_array_size_of_molecule(molecule); cb = nullptr; @@ -1969,9 +918,6 @@ void update_cluster_stats(const t_pack_molecule* molecule, continue; } - //Update atom netlist mapping - atom_ctx.lookup.set_atom_clb(blk_id, clb_index); - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); VTR_ASSERT(atom_pb); @@ -1986,7 +932,6 @@ void update_cluster_stats(const t_pack_molecule* molecule, cb = cur_pb; } cur_pb->pb_stats->num_feasible_blocks = NOT_VALID; - cur_pb->pb_stats->num_child_blocks_in_pb++; if (atom_grp_id != AttractGroupId::INVALID()) { /* TODO: Allow clusters to have more than one attraction group. */ @@ -2000,7 +945,7 @@ void update_cluster_stats(const t_pack_molecule* molecule, for (auto pin_id : atom_ctx.nlist.block_output_pins(blk_id)) { auto net_id = atom_ctx.nlist.pin_net(pin_id); if (!is_clock.count(net_id) || !global_clocks) { - mark_and_update_partial_gain(net_id, GAIN, blk_id, + mark_and_update_partial_gain(net_id, GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, OUTPUT, timing_info, @@ -2008,7 +953,7 @@ void update_cluster_stats(const t_pack_molecule* molecule, high_fanout_net_threshold, net_output_feeds_driving_block_input); } else { - mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, + mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, OUTPUT, timing_info, @@ -2021,7 +966,7 @@ void update_cluster_stats(const t_pack_molecule* molecule, /* Next Inputs */ for (auto pin_id : atom_ctx.nlist.block_input_pins(blk_id)) { auto net_id = atom_ctx.nlist.pin_net(pin_id); - mark_and_update_partial_gain(net_id, GAIN, blk_id, + mark_and_update_partial_gain(net_id, GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, INPUT, timing_info, @@ -2034,14 +979,14 @@ void update_cluster_stats(const t_pack_molecule* molecule, for (auto pin_id : atom_ctx.nlist.block_clock_pins(blk_id)) { auto net_id = atom_ctx.nlist.pin_net(pin_id); if (global_clocks) { - mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, + mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, INPUT, timing_info, is_global, high_fanout_net_threshold, net_output_feeds_driving_block_input); } else { - mark_and_update_partial_gain(net_id, GAIN, blk_id, + mark_and_update_partial_gain(net_id, GAIN, blk_id, cluster_legalizer, timing_driven, connection_driven, INPUT, timing_info, is_global, @@ -2052,8 +997,6 @@ void update_cluster_stats(const t_pack_molecule* molecule, update_total_gain(alpha, beta, timing_driven, connection_driven, atom_pb->parent_pb, attraction_groups); - - commit_lookahead_pins_used(cb); } // if this molecule came from the transitive fanout candidates remove it @@ -2063,38 +1006,20 @@ void update_cluster_stats(const t_pack_molecule* molecule, } } -void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, - t_pb_graph_node** primitives_list, - ClusterBlockId clb_index, +void start_new_cluster(ClusterLegalizer& cluster_legalizer, + LegalizationClusterId& legalization_cluster_id, t_pack_molecule* molecule, std::map& num_used_type_instances, const float target_device_utilization, - const int num_models, - const int max_cluster_size, const t_arch* arch, const std::string& device_layout_name, - std::vector* lb_type_rr_graphs, - t_lb_router_data** router_data, - const int detailed_routing_stage, - ClusteredNetlist* clb_nlist, const std::map>& primitive_candidate_block_types, int verbosity, - bool enable_pin_feasibility_filter, - bool balance_block_type_utilization, - const int feasible_block_array_size, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id) { - /* Given a starting seed block, start_new_cluster determines the next cluster type to use - * It expands the FPGA if it cannot find a legal cluster for the atom block - */ + bool balance_block_type_utilization) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - - /*Cluster's PartitionRegion is empty initially, meaning it has no floorplanning constraints*/ - PartitionRegion empty_pr; - floorplanning_ctx.cluster_constraints.push_back(empty_pr); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + DeviceContext& mutable_device_ctx = g_vpr_ctx.mutable_device(); + const DeviceContext& device_ctx = g_vpr_ctx.mutable_device(); /* Allocate a dummy initial cluster and load a atom block as a seed and check if it is legal */ AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; @@ -2136,57 +1061,24 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, //Try packing into each candidate type bool success = false; + t_logical_block_type_ptr block_type; + LegalizationClusterId new_cluster_id; for (auto type : candidate_types) { - t_pb* pb = new t_pb; - pb->pb_graph_node = type->pb_graph_head; - alloc_and_load_pb_stats(pb, feasible_block_array_size); - pb->parent_pb = nullptr; - - *router_data = alloc_and_load_router_data(&lb_type_rr_graphs[type->index], type); - //Try packing into each mode e_block_pack_status pack_result = e_block_pack_status::BLK_STATUS_UNDEFINED; for (int j = 0; j < type->pb_graph_head->pb_type->num_modes && !success; j++) { - pb->mode = j; - - reset_cluster_placement_stats(&cluster_placement_stats[type->index]); - set_mode_cluster_placement_stats(pb->pb_graph_node, j); - - //Note that since we are starting a new cluster, we use FULL_EXTERNAL_PIN_UTIL, - //which allows all cluster pins to be used. This ensures that if we have a large - //molecule which would otherwise exceed the external pin utilization targets it - //can use the full set of cluster pins when selected as the seed block -- ensuring - //it is still implementable. - pack_result = try_pack_molecule(&cluster_placement_stats[type->index], - molecule, primitives_list, pb, - num_models, max_cluster_size, clb_index, - detailed_routing_stage, *router_data, - verbosity, - enable_pin_feasibility_filter, - feasible_block_array_size, - FULL_EXTERNAL_PIN_UTIL, - temp_cluster_pr, - temp_noc_grp_id); - + std::tie(pack_result, new_cluster_id) = cluster_legalizer.start_new_cluster(molecule, type, j); success = (pack_result == e_block_pack_status::BLK_PASSED); } if (success) { VTR_LOGV(verbosity > 2, "\tPASSED_SEED: Block Type %s\n", type->name); - //Once clustering succeeds, add it to the clb netlist - if (pb->name != nullptr) { - free(pb->name); - } - pb->name = vtr::strdup(root_atom_name.c_str()); - clb_index = clb_nlist->create_block(root_atom_name.c_str(), pb, type); + // If clustering succeeds return the new_cluster_id and type. + legalization_cluster_id = new_cluster_id; + block_type = type; break; } else { VTR_LOGV(verbosity > 2, "\tFAILED_SEED: Block Type %s\n", type->name); - //Free failed clustering and try again - free_router_data(*router_data); - free_pb(pb); - delete pb; - *router_data = nullptr; } } @@ -2209,7 +1101,6 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, VTR_ASSERT(success); //Successfully create cluster - auto block_type = clb_nlist->block_type(clb_index); num_used_type_instances[block_type]++; /* Expand FPGA size if needed */ @@ -2220,24 +1111,18 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, } if (num_used_type_instances[block_type] > num_instances) { - device_ctx.grid = create_device_grid(device_layout_name, arch->grid_layouts, num_used_type_instances, target_device_utilization); + mutable_device_ctx.grid = create_device_grid(device_layout_name, arch->grid_layouts, num_used_type_instances, target_device_utilization); } } -/* - * Get candidate molecule to pack into currently open cluster - * Molecule selection priority: - * 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster - * 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster - * 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster - * 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group) - */ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, AttractionInfo& attraction_groups, const enum e_gain_type gain_mode, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + const LegalizationClusterId legalization_cluster_id, bool prioritize_transitive_connectivity, int transitive_fanout_threshold, const int feasible_block_array_size, @@ -2254,37 +1139,72 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, // 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == NOT_VALID) { - add_cluster_molecule_candidates_by_connectivity_and_timing(cur_pb, cluster_placement_stats_ptr, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_connectivity_and_timing(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + feasible_block_array_size, + attraction_groups); } if (prioritize_transitive_connectivity) { // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { - add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, cluster_placement_stats_ptr, clb_inter_blk_nets, - cluster_index, transitive_fanout_threshold, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + clb_inter_blk_nets, + legalization_cluster_id, + transitive_fanout_threshold, + feasible_block_array_size, + attraction_groups); } // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { - add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + feasible_block_array_size, + attraction_groups); } } else { //Reverse order // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { - add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + feasible_block_array_size, + attraction_groups); } // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { - add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, cluster_placement_stats_ptr, clb_inter_blk_nets, - cluster_index, transitive_fanout_threshold, feasible_block_array_size, attraction_groups); + add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + clb_inter_blk_nets, + legalization_cluster_id, + transitive_fanout_threshold, + feasible_block_array_size, + attraction_groups); } } // 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group) if (cur_pb->pb_stats->num_feasible_blocks == 0) { - add_cluster_molecule_candidates_by_attraction_group(cur_pb, cluster_placement_stats_ptr, attraction_groups, - feasible_block_array_size, cluster_index, primitive_candidate_block_types); + add_cluster_molecule_candidates_by_attraction_group(cur_pb, + cluster_placement_stats_ptr, + prepacker, + cluster_legalizer, + attraction_groups, + feasible_block_array_size, + legalization_cluster_id, + primitive_candidate_block_types); } /* Grab highest gain molecule */ t_pack_molecule* molecule = nullptr; @@ -2299,9 +1219,10 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, return molecule; } -/* Add molecules with strong connectedness to the current cluster to the list of feasible blocks. */ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, const int feasible_block_array_size, AttractionInfo& attraction_groups) { VTR_ASSERT(cur_pb->pb_stats->num_feasible_blocks == NOT_VALID); @@ -2309,13 +1230,11 @@ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, cur_pb->pb_stats->num_feasible_blocks = 0; cur_pb->pb_stats->explore_transitive_fanout = true; /* If no legal molecules found, enable exploration of molecules two hops away */ - auto& atom_ctx = g_vpr_ctx.atom(); - for (AtomBlockId blk_id : cur_pb->pb_stats->marked_blocks) { - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + if (!cluster_legalizer.is_atom_clustered(blk_id)) { + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr, cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); @@ -2325,32 +1244,33 @@ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, } } -/* Add molecules based on weak connectedness (connected by high fanout nets) with current cluster */ void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, const int feasible_block_array_size, AttractionInfo& attraction_groups) { /* Because the packer ignores high fanout nets when marking what blocks * to consider, use one of the ignored high fanout net to fill up lightly * related blocks */ + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; + reset_tried_but_unused_cluster_placements(cluster_placement_stats_ptr); AtomNetId net_id = cur_pb->pb_stats->tie_break_high_fanout_net; - auto& atom_ctx = g_vpr_ctx.atom(); - int count = 0; - for (auto pin_id : atom_ctx.nlist.net_pins(net_id)) { + for (auto pin_id : atom_nlist.net_pins(net_id)) { if (count >= AAPACK_MAX_HIGH_FANOUT_EXPLORE) { break; } - AtomBlockId blk_id = atom_ctx.nlist.pin_block(pin_id); + AtomBlockId blk_id = atom_nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + if (!cluster_legalizer.is_atom_clustered(blk_id)) { + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr, cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_HIGH_FANOUT_EXPLORE), attraction_groups); @@ -2362,24 +1282,17 @@ void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, cur_pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); /* Mark off that this high fanout net has been considered */ } -/* - * If the current cluster being packed has an attraction group associated with it - * (i.e. there are atoms in it that belong to an attraction group), this routine adds molecules - * from the associated attraction group to the list of feasible blocks for the cluster. - * Attraction groups can be very large, so we only add some randomly selected molecules for efficiency - * if the number of atoms in the group is greater than 500. Therefore, the molecules added to the candidates - * will vary each time you call this function. - */ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, AttractionInfo& attraction_groups, const int feasible_block_array_size, - ClusterBlockId clb_index, + LegalizationClusterId legalization_cluster_id, std::map>& primitive_candidate_block_types) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; - auto cluster_type = cluster_ctx.clb_nlist.block_type(clb_index); + auto cluster_type = cluster_legalizer.get_cluster_type(legalization_cluster_id); /* * For each cluster, we want to explore the attraction group molecules as potential @@ -2405,13 +1318,13 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, AttractionGroup& group = attraction_groups.get_attraction_group_info(grp_id); std::vector available_atoms; for (AtomBlockId atom_id : group.group_atoms) { - const auto& atom_model = atom_ctx.nlist.block_model(atom_id); + const auto& atom_model = atom_nlist.block_model(atom_id); auto itr = primitive_candidate_block_types.find(atom_model); VTR_ASSERT(itr != primitive_candidate_block_types.end()); std::vector& candidate_types = itr->second; //Only consider molecules that are unpacked and of the correct type - if (atom_ctx.lookup.atom_clb(atom_id) == ClusterBlockId::INVALID() + if (!cluster_legalizer.is_atom_clustered(atom_id) && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { available_atoms.push_back(atom_id); } @@ -2426,17 +1339,17 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, if (num_available_atoms < 500) { //for (AtomBlockId atom_id : group.group_atoms) { for (AtomBlockId atom_id : available_atoms) { - const auto& atom_model = atom_ctx.nlist.block_model(atom_id); + const auto& atom_model = atom_nlist.block_model(atom_id); auto itr = primitive_candidate_block_types.find(atom_model); VTR_ASSERT(itr != primitive_candidate_block_types.end()); std::vector& candidate_types = itr->second; //Only consider molecules that are unpacked and of the correct type - if (atom_ctx.lookup.atom_clb(atom_id) == ClusterBlockId::INVALID() + if (!cluster_legalizer.is_atom_clustered(atom_id) && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(atom_id); + t_pack_molecule* molecule = prepacker.get_atom_molecule(atom_id); if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr, cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); @@ -2458,17 +1371,19 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, //AtomBlockId blk_id = group.group_atoms[selected_atom]; AtomBlockId blk_id = available_atoms[selected_atom]; - const auto& atom_model = atom_ctx.nlist.block_model(blk_id); + const auto& atom_model = atom_nlist.block_model(blk_id); auto itr = primitive_candidate_block_types.find(atom_model); VTR_ASSERT(itr != primitive_candidate_block_types.end()); std::vector& candidate_types = itr->second; //Only consider molecules that are unpacked and of the correct type - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID() + if (!cluster_legalizer.is_atom_clustered(blk_id) && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, + cluster_placement_stats_ptr, + cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); @@ -2478,11 +1393,12 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, } } -/* Add molecules based on transitive connections (eg. 2 hops away) with current cluster*/ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + const LegalizationClusterId legalization_cluster_id, int transitive_fanout_threshold, const int feasible_block_array_size, AttractionInfo& attraction_groups) { @@ -2490,15 +1406,19 @@ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, cur_pb->pb_stats->explore_transitive_fanout = false; /* First time finding transitive fanout candidates therefore alloc and load them */ - load_transitive_fanout_candidates(cluster_index, + load_transitive_fanout_candidates(legalization_cluster_id, cur_pb->pb_stats, + prepacker, + cluster_legalizer, clb_inter_blk_nets, transitive_fanout_threshold); /* Only consider candidates that pass a very simple legality check */ for (const auto& transitive_candidate : cur_pb->pb_stats->transitive_fanout_candidates) { t_pack_molecule* molecule = transitive_candidate.second; if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + bool success = check_free_primitives_for_molecule_atoms(molecule, + cluster_placement_stats_ptr, + cluster_legalizer); if (success) { add_molecule_to_pb_stats_candidates(molecule, cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_TRANSITIVE_EXPLORE), attraction_groups); @@ -2507,14 +1427,14 @@ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, } } -/*Check whether a free primitive exists for each atom block in the molecule*/ -bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, t_cluster_placement_stats* cluster_placement_stats_ptr) { - auto& atom_ctx = g_vpr_ctx.atom(); +bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, + t_cluster_placement_stats* cluster_placement_stats_ptr, + const ClusterLegalizer& cluster_legalizer) { bool success = true; for (int i_atom = 0; i_atom < get_array_size_of_molecule(molecule); i_atom++) { if (molecule->atom_block_ids[i_atom]) { - VTR_ASSERT(atom_ctx.lookup.atom_clb(molecule->atom_block_ids[i_atom]) == ClusterBlockId::INVALID()); + VTR_ASSERT(!cluster_legalizer.is_atom_clustered(molecule->atom_block_ids[i_atom])); auto blk_id2 = molecule->atom_block_ids[i_atom]; if (!exists_free_primitive_for_atom_block(cluster_placement_stats_ptr, blk_id2)) { /* TODO (Jason Luu): debating whether to check if placement exists for molecule @@ -2537,15 +1457,17 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, const int feasible_block_array_size, int* num_unrelated_clustering_attempts, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + LegalizationClusterId legalization_cluster_id, int verbosity, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, std::map>& primitive_candidate_block_types) { /* Finds the block with the greatest gain that satisfies the * input, clock and capacity constraints of a cluster that are - * passed in. If no suitable block is found it returns ClusterBlockId::INVALID(). + * passed in. If no suitable block is found it returns nullptr. */ VTR_ASSERT(cur_pb->is_root()); @@ -2553,8 +1475,9 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, /* If cannot pack into primitive, try packing into cluster */ auto best_molecule = get_highest_gain_molecule(cur_pb, attraction_groups, - NOT_HILL_CLIMBING, cluster_placement_stats_ptr, clb_inter_blk_nets, - cluster_index, prioritize_transitive_connectivity, + NOT_HILL_CLIMBING, cluster_placement_stats_ptr, + prepacker, cluster_legalizer, clb_inter_blk_nets, + legalization_cluster_id, prioritize_transitive_connectivity, transitive_fanout_threshold, feasible_block_array_size, primitive_candidate_block_types); /* If no blocks have any gain to the current cluster, the code above * @@ -2581,7 +1504,6 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, return best_molecule; } -//Calculates molecule statistics for a single molecule t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const AtomNetlist& atom_nlist) { t_molecule_stats molecule_stats; @@ -2652,14 +1574,15 @@ t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const Atom std::vector initialize_seed_atoms(const e_cluster_seed seed_type, const t_molecule_stats& max_molecule_stats, + const Prepacker& prepacker, const vtr::vector& atom_criticality) { - const AtomContext& atom_ctx = g_vpr_ctx.atom(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; //Put all atoms in seed list - std::vector seed_atoms(atom_ctx.nlist.blocks().begin(), atom_ctx.nlist.blocks().end()); + std::vector seed_atoms(atom_nlist.blocks().begin(), atom_nlist.blocks().end()); //Initially all gains are zero - vtr::vector atom_gains(atom_ctx.nlist.blocks().size(), 0.); + vtr::vector atom_gains(atom_nlist.blocks().size(), 0.); if (seed_type == e_cluster_seed::TIMING) { VTR_ASSERT(atom_gains.size() == atom_criticality.size()); @@ -2669,21 +1592,21 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, } else if (seed_type == e_cluster_seed::MAX_INPUTS) { //By number of used molecule input pins - for (auto blk : atom_ctx.nlist.blocks()) { - const t_pack_molecule* blk_mol = atom_ctx.prepacker.get_atom_molecule(blk); - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_ctx.nlist); + for (auto blk : atom_nlist.blocks()) { + const t_pack_molecule* blk_mol = prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_nlist); atom_gains[blk] = molecule_stats.num_used_ext_inputs; } } else if (seed_type == e_cluster_seed::BLEND) { //By blended gain (criticality and inputs used) - for (auto blk : atom_ctx.nlist.blocks()) { + for (auto blk : atom_nlist.blocks()) { /* Score seed gain of each block as a weighted sum of timing criticality, * number of tightly coupled blocks connected to it, and number of external inputs */ float seed_blend_fac = 0.5; - const t_pack_molecule* blk_mol = atom_ctx.prepacker.get_atom_molecule(blk); - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_ctx.nlist); + const t_pack_molecule* blk_mol = prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_nlist); VTR_ASSERT(max_molecule_stats.num_used_ext_inputs > 0); float blend_gain = (seed_blend_fac * atom_criticality[blk] @@ -2695,9 +1618,9 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, } else if (seed_type == e_cluster_seed::MAX_PINS || seed_type == e_cluster_seed::MAX_INPUT_PINS) { //By pins per molecule (i.e. available pins on primitives, not pins in use) - for (auto blk : atom_ctx.nlist.blocks()) { - const t_pack_molecule* mol = atom_ctx.prepacker.get_atom_molecule(blk); - const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_ctx.nlist); + for (auto blk : atom_nlist.blocks()) { + const t_pack_molecule* mol = prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_nlist); int molecule_pins = 0; if (seed_type == e_cluster_seed::MAX_PINS) { @@ -2713,9 +1636,9 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, } } else if (seed_type == e_cluster_seed::BLEND2) { - for (auto blk : atom_ctx.nlist.blocks()) { - const t_pack_molecule* mol = atom_ctx.prepacker.get_atom_molecule(blk); - const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_ctx.nlist); + for (auto blk : atom_nlist.blocks()) { + const t_pack_molecule* mol = prepacker.get_atom_molecule(blk); + const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_nlist); float pin_ratio = vtr::safe_ratio(molecule_stats.num_pins, max_molecule_stats.num_pins); float input_pin_ratio = vtr::safe_ratio(molecule_stats.num_input_pins, max_molecule_stats.num_input_pins); @@ -2773,17 +1696,18 @@ std::vector initialize_seed_atoms(const e_cluster_seed seed_type, return seed_atoms; } -t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, const std::vector& seed_atoms) { - auto& atom_ctx = g_vpr_ctx.atom(); - +t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, + const std::vector& seed_atoms, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer) { while (seed_index < static_cast(seed_atoms.size())) { AtomBlockId blk_id = seed_atoms[seed_index++]; // Check if the atom has already been assigned to a cluster - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { t_pack_molecule* best = nullptr; - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { if (best == nullptr || (best->base_gain) < (molecule->base_gain)) { best = molecule; @@ -2798,17 +1722,11 @@ t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, const std::vect return nullptr; } -/* get gain of packing molecule into current cluster - * gain is equal to: - * total_block_gain - * + molecule_base_gain*some_factor - * - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor - */ float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures) { float gain; int i; int num_introduced_inputs_of_indirectly_related_block; - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); gain = 0; float attraction_group_penalty = 0.1; @@ -2861,388 +1779,32 @@ float get_molecule_gain(t_pack_molecule* molecule, std::map& return gain; } -/* Determine if speculatively packed cur_pb is pin feasible - * Runtime is actually not that bad for this. It's worst case O(k^2) where k is the - * number of pb_graph pins. Can use hash tables or make incremental if becomes an issue. - */ -void try_update_lookahead_pins_used(t_pb* cur_pb) { - int i, j; - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - - // run recursively till a leaf (primitive) pb block is reached - if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { - if (cur_pb->child_pbs != nullptr) { - for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i] != nullptr) { - for (j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - try_update_lookahead_pins_used(&cur_pb->child_pbs[i][j]); - } - } - } - } - } else { - // find if this child (primitive) pb block has an atom mapped to it, - // if yes compute and mark lookahead pins used for that pb block - auto& atom_ctx = g_vpr_ctx.atom(); - AtomBlockId blk_id = atom_ctx.lookup.pb_atom(cur_pb); - if (pb_type->blif_model != nullptr && blk_id) { - compute_and_mark_lookahead_pins_used(blk_id); - } - } -} - -/* Resets nets used at different pin classes for determining pin feasibility */ -void reset_lookahead_pins_used(t_pb* cur_pb) { - int i, j; - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - if (cur_pb->pb_stats == nullptr) { - return; /* No pins used, no need to continue */ - } - - if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { - for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - cur_pb->pb_stats->lookahead_input_pins_used[i].clear(); - } - - for (i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { - cur_pb->pb_stats->lookahead_output_pins_used[i].clear(); - } - - if (cur_pb->child_pbs != nullptr) { - for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i] != nullptr) { - for (j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - reset_lookahead_pins_used(&cur_pb->child_pbs[i][j]); - } - } - } - } - } -} - -/* Determine if pins of speculatively packed pb are legal */ -void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - - const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); - VTR_ASSERT(cur_pb != nullptr); - - /* Walk through inputs, outputs, and clocks marking pins off of the same class */ - for (auto pin_id : atom_ctx.nlist.block_pins(blk_id)) { - auto net_id = atom_ctx.nlist.pin_net(pin_id); - - const t_pb_graph_pin* pb_graph_pin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); - compute_and_mark_lookahead_pins_used_for_pin(pb_graph_pin, cur_pb, net_id); - } -} - -/** - * Given a pin and its assigned net, mark all pin classes that are affected. - * Check if connecting this pin to it's driver pin or to all sink pins will - * require leaving a pb_block starting from the parent pb_block of the - * primitive till the root block (depth = 0). If leaving a pb_block is - * required add this net to the pin class (to increment the number of used - * pins from this class) that should be used to leave the pb_block. - */ -void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, const t_pb* primitive_pb, const AtomNetId net_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - - // starting from the parent pb of the input primitive go up in the hierarchy till the root block - for (auto cur_pb = primitive_pb->parent_pb; cur_pb; cur_pb = cur_pb->parent_pb) { - const auto depth = cur_pb->pb_graph_node->pb_type->depth; - const auto pin_class = pb_graph_pin->parent_pin_class[depth]; - VTR_ASSERT(pin_class != OPEN); - - const auto driver_blk_id = atom_ctx.nlist.net_driver_block(net_id); - - // if this primitive pin is an input pin - if (pb_graph_pin->port->type == IN_PORT) { - /* find location of net driver if exist in clb, NULL otherwise */ - // find the driver of the input net connected to the pin being studied - const auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); - // find the id of the atom occupying the input primitive_pb - const auto prim_blk_id = atom_ctx.lookup.pb_atom(primitive_pb); - // find the pb block occupied by the driving atom - const auto driver_pb = atom_ctx.lookup.atom_pb(driver_blk_id); - // pb_graph_pin driving net_id in the driver pb block - t_pb_graph_pin* output_pb_graph_pin = nullptr; - // if the driver block is in the same clb as the input primitive block - if (atom_ctx.lookup.atom_clb(driver_blk_id) == atom_ctx.lookup.atom_clb(prim_blk_id)) { - // get pb_graph_pin driving the given net - output_pb_graph_pin = get_driver_pb_graph_pin(driver_pb, driver_pin_id); - } - - bool is_reachable = false; - - // if the driver pin is within the cluster - if (output_pb_graph_pin) { - // find if the driver pin can reach the input pin of the primitive or not - const t_pb* check_pb = driver_pb; - while (check_pb && check_pb != cur_pb) { - check_pb = check_pb->parent_pb; - } - if (check_pb) { - for (int i = 0; i < output_pb_graph_pin->num_connectable_primitive_input_pins[depth]; i++) { - if (pb_graph_pin == output_pb_graph_pin->list_of_connectable_input_pin_ptrs[depth][i]) { - is_reachable = true; - break; - } - } - } - } - - // Must use an input pin to connect the driver to the input pin of the given primitive, either the - // driver atom is not contained in the cluster or is contained but cannot reach the primitive pin - if (!is_reachable) { - // add net to lookahead_input_pins_used if not already added - auto it = std::find(cur_pb->pb_stats->lookahead_input_pins_used[pin_class].begin(), - cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end(), net_id); - if (it == cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end()) { - cur_pb->pb_stats->lookahead_input_pins_used[pin_class].push_back(net_id); - } - } - } else { - VTR_ASSERT(pb_graph_pin->port->type == OUT_PORT); - /* - * Determine if this net (which is driven from within this cluster) leaves this cluster - * (and hence uses an output pin). - */ - - bool net_exits_cluster = true; - int num_net_sinks = static_cast(atom_ctx.nlist.net_sinks(net_id).size()); - - if (pb_graph_pin->num_connectable_primitive_input_pins[depth] >= num_net_sinks) { - //It is possible the net is completely absorbed in the cluster, - //since this pin could (potentially) drive all the net's sinks - - /* Important: This runtime penalty looks a lot scarier than it really is. - * For high fan-out nets, I at most look at the number of pins within the - * cluster which limits runtime. - * - * DO NOT REMOVE THIS INITIAL FILTER WITHOUT CAREFUL ANALYSIS ON RUNTIME!!! - * - * Key Observation: - * For LUT-based designs it is impossible for the average fanout to exceed - * the number of LUT inputs so it's usually around 4-5 (pigeon-hole argument, - * if the average fanout is greater than the number of LUT inputs, where do - * the extra connections go? Therefore, average fanout must be capped to a - * small constant where the constant is equal to the number of LUT inputs). - * The real danger to runtime is when the number of sinks of a net gets doubled - */ - - //Check if all the net sinks are, in fact, inside this cluster - bool all_sinks_in_cur_cluster = true; - ClusterBlockId driver_clb = atom_ctx.lookup.atom_clb(driver_blk_id); - for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { - auto sink_blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(sink_blk_id) != driver_clb) { - all_sinks_in_cur_cluster = false; - break; - } - } - - if (all_sinks_in_cur_cluster) { - //All the sinks are part of this cluster, so the net may be fully absorbed. - // - //Verify this, by counting the number of net sinks reachable from the driver pin. - //If the count equals the number of net sinks then the net is fully absorbed and - //the net does not exit the cluster - /* TODO: I should cache the absorbed outputs, once net is absorbed, - * net is forever absorbed, no point in rechecking every time */ - if (net_sinks_reachable_in_cluster(pb_graph_pin, depth, net_id)) { - //All the sinks are reachable inside the cluster - net_exits_cluster = false; - } - } - } - - if (net_exits_cluster) { - /* This output must exit this cluster */ - cur_pb->pb_stats->lookahead_output_pins_used[pin_class].push_back(net_id); - } - } - } -} - -int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id) { - size_t num_reachable_sinks = 0; - auto& atom_ctx = g_vpr_ctx.atom(); - - //Record the sink pb graph pins we are looking for - std::unordered_set sink_pb_gpins; - for (const AtomPinId pin_id : atom_ctx.nlist.net_sinks(net_id)) { - const t_pb_graph_pin* sink_pb_gpin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); - VTR_ASSERT(sink_pb_gpin); - - sink_pb_gpins.insert(sink_pb_gpin); - } - - //Count how many sink pins are reachable - for (int i_prim_pin = 0; i_prim_pin < driver_pb_gpin->num_connectable_primitive_input_pins[depth]; ++i_prim_pin) { - const t_pb_graph_pin* reachable_pb_gpin = driver_pb_gpin->list_of_connectable_input_pin_ptrs[depth][i_prim_pin]; - - if (sink_pb_gpins.count(reachable_pb_gpin)) { - ++num_reachable_sinks; - if (num_reachable_sinks == atom_ctx.nlist.net_sinks(net_id).size()) { - return true; - } - } - } - - return false; -} - -/** - * Returns the pb_graph_pin of the atom pin defined by the driver_pin_id in the driver_pb - */ -t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - const auto driver_pb_type = driver_pb->pb_graph_node->pb_type; - int output_port = 0; - // find the port of the pin driving the net as well as the port model - auto driver_port_id = atom_ctx.nlist.pin_port(driver_pin_id); - auto driver_model_port = atom_ctx.nlist.port_model(driver_port_id); - // find the port id of the port containing the driving pin in the driver_pb_type - for (int i = 0; i < driver_pb_type->num_ports; i++) { - auto& prim_port = driver_pb_type->ports[i]; - if (prim_port.type == OUT_PORT) { - if (prim_port.model_port == driver_model_port) { - // get the output pb_graph_pin driving this input net - return &(driver_pb->pb_graph_node->output_pins[output_port][atom_ctx.nlist.pin_port_bit(driver_pin_id)]); - } - output_port++; - } - } - // the pin should be found - VTR_ASSERT(false); - return nullptr; -} - -/* Check if the number of available inputs/outputs for a pin class is sufficient for speculatively packed blocks */ -bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util) { - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - - if (pb_type->num_modes > 0 && cur_pb->name) { - for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - size_t class_size = cur_pb->pb_graph_node->input_pin_class_size[i]; - - if (cur_pb->is_root()) { - // Scale the class size by the maximum external pin utilization factor - // Use ceil to avoid classes of size 1 from being scaled to zero - class_size = std::ceil(max_external_pin_util.input_pin_util * class_size); - // if the number of pins already used is larger than class size, then the number of - // cluster inputs already used should be our constraint. Why is this needed? This is - // needed since when packing the seed block the maximum external pin utilization is - // used as 1.0 allowing molecules that are using up to all the cluster inputs to be - // packed legally. Therefore, if the seed block is already using more inputs than - // the allowed maximum utilization, this should become the new maximum pin utilization. - class_size = std::max(class_size, cur_pb->pb_stats->input_pins_used[i].size()); - } - - if (cur_pb->pb_stats->lookahead_input_pins_used[i].size() > class_size) { - return false; - } - } - - for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { - size_t class_size = cur_pb->pb_graph_node->output_pin_class_size[i]; - if (cur_pb->is_root()) { - // Scale the class size by the maximum external pin utilization factor - // Use ceil to avoid classes of size 1 from being scaled to zero - class_size = std::ceil(max_external_pin_util.output_pin_util * class_size); - // if the number of pins already used is larger than class size, then the number of - // cluster outputs already used should be our constraint. Why is this needed? This is - // needed since when packing the seed block the maximum external pin utilization is - // used as 1.0 allowing molecules that are using up to all the cluster inputs to be - // packed legally. Therefore, if the seed block is already using more inputs than - // the allowed maximum utilization, this should become the new maximum pin utilization. - class_size = std::max(class_size, cur_pb->pb_stats->output_pins_used[i].size()); - } - - if (cur_pb->pb_stats->lookahead_output_pins_used[i].size() > class_size) { - return false; - } - } - - if (cur_pb->child_pbs) { - for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i]) { - for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - if (!check_lookahead_pins_used(&cur_pb->child_pbs[i][j], max_external_pin_util)) - return false; - } - } - } - } - } - - return true; -} - -/* Speculation successful, commit input/output pins used */ -void commit_lookahead_pins_used(t_pb* cur_pb) { - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - - if (pb_type->num_modes > 0 && cur_pb->name) { - for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->input_pin_class_size[i]); - for (size_t j = 0; j < cur_pb->pb_stats->lookahead_input_pins_used[i].size(); j++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i][j]); - cur_pb->pb_stats->input_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_input_pins_used[i][j]}); - } - } - - for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->output_pin_class_size[i]); - for (size_t j = 0; j < cur_pb->pb_stats->lookahead_output_pins_used[i].size(); j++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i][j]); - cur_pb->pb_stats->output_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_output_pins_used[i][j]}); - } - } - - if (cur_pb->child_pbs) { - for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i]) { - for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - commit_lookahead_pins_used(&cur_pb->child_pbs[i][j]); - } - } - } - } - } -} - -/** - * Score unclustered atoms that are two hops away from current cluster - * For example, consider a cluster that has a FF feeding an adder in another - * cluster. Since this FF is feeding an adder that is packed in another cluster - * this function should find other FFs that are feeding other inputs of this adder - * since they are two hops away from the FF packed in this cluster - */ -void load_transitive_fanout_candidates(ClusterBlockId clb_index, +void load_transitive_fanout_candidates(LegalizationClusterId legalization_cluster_id, t_pb_stats* pb_stats, - vtr::vector>& clb_inter_blk_nets, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, int transitive_fanout_threshold) { - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; // iterate over all the nets that have pins in this cluster for (const auto net_id : pb_stats->marked_nets) { // only consider small nets to constrain runtime - if (int(atom_ctx.nlist.net_pins(net_id).size()) < transitive_fanout_threshold + 1) { + if (int(atom_nlist.net_pins(net_id).size()) < transitive_fanout_threshold + 1) { // iterate over all the pins of the net - for (const auto pin_id : atom_ctx.nlist.net_pins(net_id)) { - AtomBlockId atom_blk_id = atom_ctx.nlist.pin_block(pin_id); + for (const auto pin_id : atom_nlist.net_pins(net_id)) { + AtomBlockId atom_blk_id = atom_nlist.pin_block(pin_id); // get the transitive cluster - ClusterBlockId tclb = atom_ctx.lookup.atom_clb(atom_blk_id); + LegalizationClusterId tclb = cluster_legalizer.get_atom_cluster(atom_blk_id); // if the block connected to this pin is packed in another cluster - if (tclb != clb_index && tclb != ClusterBlockId::INVALID()) { + if (tclb != legalization_cluster_id && tclb != LegalizationClusterId::INVALID()) { // explore transitive nets from already packed cluster for (AtomNetId tnet : clb_inter_blk_nets[tclb]) { // iterate over all the pins of the net - for (AtomPinId tpin : atom_ctx.nlist.net_pins(tnet)) { - auto blk_id = atom_ctx.nlist.pin_block(tpin); + for (AtomPinId tpin : atom_nlist.net_pins(tnet)) { + auto blk_id = atom_nlist.pin_block(tpin); // This transitive atom is not packed, score and add - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (!cluster_legalizer.is_atom_clustered(blk_id)) { auto& transitive_fanout_candidates = pb_stats->transitive_fanout_candidates; if (pb_stats->gain.count(blk_id) == 0) { @@ -3250,7 +1812,7 @@ void load_transitive_fanout_candidates(ClusterBlockId clb_index, } else { pb_stats->gain[blk_id] += 0.001; } - t_pack_molecule* molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); if (molecule->valid) { transitive_fanout_candidates.insert({molecule->atom_block_ids[molecule->root], molecule}); } @@ -3265,9 +1827,8 @@ void load_transitive_fanout_candidates(ClusterBlockId clb_index, std::map> identify_primitive_candidate_block_types() { std::map> model_candidates; - auto& atom_ctx = g_vpr_ctx.atom(); - auto& atom_nlist = atom_ctx.nlist; - auto& device_ctx = g_vpr_ctx.device(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; + const DeviceContext& device_ctx = g_vpr_ctx.device(); std::set unique_models; // Find all logic models used in the netlist @@ -3295,7 +1856,7 @@ std::map> identify_primiti void print_seed_gains(const char* fname, const std::vector& seed_atoms, const vtr::vector& atom_gain, const vtr::vector& atom_criticality) { FILE* fp = vtr::fopen(fname, "w"); - auto& atom_ctx = g_vpr_ctx.atom(); + const AtomContext& atom_ctx = g_vpr_ctx.atom(); //For prett formatting determine the maximum name length int max_name_len = strlen("atom_block_name"); @@ -3324,99 +1885,6 @@ void print_seed_gains(const char* fname, const std::vector& seed_at fclose(fp); } -/** - * This function takes a chain molecule, and the pb_graph_node that is chosen - * for packing the molecule's root block. Using the given root_primitive, this - * function will identify which chain id this molecule is being mapped to and - * will update the chain id value inside the chain info data structure of this - * molecule - */ -void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive) { - VTR_ASSERT(chain_molecule->chain_info->chain_id == -1 && chain_molecule->chain_info->is_long_chain); - - auto chain_root_pins = chain_molecule->pack_pattern->chain_root_pins; - - // long chains should only be placed at the beginning of the chain - // Since for long chains the molecule size is already equal to the - // total number of adders in the cluster. Therefore, it should - // always be placed at the very first adder in this cluster. - for (size_t chainId = 0; chainId < chain_root_pins.size(); chainId++) { - if (chain_root_pins[chainId][0]->parent_node == root_primitive) { - chain_molecule->chain_info->chain_id = chainId; - chain_molecule->chain_info->first_packed_molecule = chain_molecule; - return; - } - } - - VTR_ASSERT(false); -} - -/** - * This function takes the root block of a chain molecule and a proposed - * placement primitive for this block. The function then checks if this - * chain root block has a placement constraint (such as being driven from - * outside the cluster) and returns the status of the placement accordingly. - */ -enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, - const t_pack_molecule* molecule, - const AtomBlockId blk_id) { - enum e_block_pack_status block_pack_status = e_block_pack_status::BLK_PASSED; - auto& atom_ctx = g_vpr_ctx.atom(); - - bool is_long_chain = molecule->chain_info->is_long_chain; - - const auto& chain_root_pins = molecule->pack_pattern->chain_root_pins; - - t_model_ports* root_port = chain_root_pins[0][0]->port->model_port; - AtomNetId chain_net_id; - auto port_id = atom_ctx.nlist.find_atom_port(blk_id, root_port); - - if (port_id) { - chain_net_id = atom_ctx.nlist.port_net(port_id, chain_root_pins[0][0]->pin_number); - } - - // if this block is part of a long chain or it is driven by a cluster - // input pin we need to check the placement legality of this block - // Depending on the logic synthesis even small chains that can fit within one - // cluster might need to start at the top of the cluster as their input can be - // driven by a global gnd or vdd. Therefore even if this is not a long chain - // but its input pin is driven by a net, the placement legality is checked. - if (is_long_chain || chain_net_id) { - auto chain_id = molecule->chain_info->chain_id; - // if this chain has a chain id assigned to it (implies is_long_chain too) - if (chain_id != -1) { - // the chosen primitive should be a valid starting point for the chain - // long chains should only be placed at the top of the chain tieOff = 0 - if (pb_graph_node != chain_root_pins[chain_id][0]->parent_node) { - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - } - // the chain doesn't have an assigned chain_id yet - } else { - block_pack_status = e_block_pack_status::BLK_FAILED_FEASIBLE; - for (const auto& chain : chain_root_pins) { - for (auto tieOff : chain) { - // check if this chosen primitive is one of the possible - // starting points for this chain. - if (pb_graph_node == tieOff->parent_node) { - // this location matches with the one of the dedicated chain - // input from outside logic block, therefore it is feasible - block_pack_status = e_block_pack_status::BLK_PASSED; - break; - } - // long chains should only be placed at the top of the chain tieOff = 0 - if (is_long_chain) break; - } - } - } - } - - return block_pack_status; -} - -/** - * This function update the pb_type_count data structure by incrementing - * the number of used pb_types in the given packed cluster t_pb - */ size_t update_pb_type_count(const t_pb* pb, std::map& pb_type_count, size_t depth) { size_t max_depth = depth; @@ -3460,10 +1928,6 @@ void print_pb_type_count_recurr(t_pb_type* pb_type, size_t max_name_chars, size_ } } -/** - * This function identifies the logic block type which is - * defined by the block type which has a lut primitive - */ t_logical_block_type_ptr identify_logic_block_type(std::map>& primitive_candidate_block_types) { std::string lut_name = ".names"; @@ -3476,12 +1940,6 @@ t_logical_block_type_ptr identify_logic_block_type(std::mapLE) that has more than one instance within the cluster. - */ t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type) { // if there is no CLB-like cluster, then there is no LE pb_block if (!logic_block_type) @@ -3506,9 +1964,6 @@ t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type) { return nullptr; } -/** - * This function updates the le_count data structure from the given packed cluster - */ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count) { // if this cluster doesn't contain LEs or there // are no les in this architecture, ignore it @@ -3548,10 +2003,6 @@ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_ } } -/** - * This function returns true if the given physical block has - * a primitive matching the given blif model and is used - */ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name) { auto pb_graph_node = pb->pb_graph_node; auto pb_type = pb_graph_node->pb_type; @@ -3579,9 +2030,6 @@ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name) return false; } -/** - * Print the LE count data strurture - */ void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type) { VTR_LOG("\nLogic Element (%s) detailed count:\n", le_pb_type->name); VTR_LOG(" Total number of Logic Elements used : %d\n", le_count[0] + le_count[1] + le_count[2]); @@ -3590,11 +2038,6 @@ void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type) { VTR_LOG(" LEs used for registers only : %d\n\n", le_count[2]); } -/** - * Given a pointer to a pb in a cluster, this routine returns - * a pointer to the top-level pb of the given pb. - * This is needed when updating the gain for a cluster. - */ t_pb* get_top_level_pb(t_pb* pb) { t_pb* top_level_pb = pb; @@ -3608,20 +2051,17 @@ t_pb* get_top_level_pb(t_pb* pb) { return top_level_pb; } -void init_clb_atoms_lookup(vtr::vector>& atoms_lookup) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - atoms_lookup.resize(cluster_ctx.clb_nlist.blocks().size()); - - for (auto atom_blk_id : atom_ctx.nlist.blocks()) { +void init_clb_atoms_lookup(vtr::vector>& atoms_lookup, + const AtomContext& atom_ctx, + const ClusteredNetlist& clb_nlist) { + // Resize the atoms lookup to the number of clusters. + atoms_lookup.resize(clb_nlist.blocks().size()); + for (AtomBlockId atom_blk_id : atom_ctx.nlist.blocks()) { + // Get the CLB that this atom is packed into. ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(atom_blk_id); - - /* if this data structure is being built alongside the clustered netlist */ - /* e.g. when ingesting and legalizing a flat placement solution, some atoms */ - /* may not yet be mapped to a valid clb_index */ - if (clb_index != ClusterBlockId::INVALID()) { - atoms_lookup[clb_index].insert(atom_blk_id); - } + // Every atom block should be in a cluster. + VTR_ASSERT_SAFE(clb_index.is_valid()); + // Insert this clb into the lookup's set. + atoms_lookup[clb_index].insert(atom_blk_id); } } diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h index 4f190645ff0..d25a3b1ab44 100644 --- a/vpr/src/pack/cluster_util.h +++ b/vpr/src/pack/cluster_util.h @@ -2,15 +2,18 @@ #define CLUSTER_UTIL_H #include +#include "cluster_legalizer.h" #include "pack_types.h" #include "vtr_vector.h" class AtomNetId; class ClusterBlockId; +class ClusteredNetlist; class PreClusterDelayCalculator; class Prepacker; class SetupTimingInfo; class t_pack_molecule; +struct AtomContext; /** * @file @@ -20,9 +23,6 @@ class t_pack_molecule; constexpr int AAPACK_MAX_HIGH_FANOUT_EXPLORE = 10; /* For high-fanout nets that are ignored, consider a maximum of this many sinks, must be less than packer_opts.feasible_block_array_size */ constexpr int AAPACK_MAX_TRANSITIVE_EXPLORE = 40; /* When investigating transitive fanout connections in packing, consider a maximum of this many molecules, must be less than packer_opts.feasible_block_array_size */ -//Constant allowing all cluster pins to be used -const t_ext_pin_util FULL_EXTERNAL_PIN_UTIL(1., 1.); - enum e_gain_update { GAIN, NO_GAIN @@ -45,12 +45,6 @@ enum e_net_relation_to_clustered_block { OUTPUT }; -enum e_detailed_routing_stages { - E_DETAILED_ROUTE_AT_END_ONLY = 0, - E_DETAILED_ROUTE_FOR_EACH_ATOM, - E_DETAILED_ROUTE_INVALID -}; - /* Linked list structure. Stores one integer (iblk). */ struct t_molecule_link { t_pack_molecule* moleculeptr; @@ -79,7 +73,6 @@ struct t_cluster_progress_stats { /* Useful data structures for creating or modifying clusters */ struct t_clustering_data { - vtr::vector*> intra_lb_routing; int* hill_climbing_inputs_avail; /* Keeps a linked list of the unclustered blocks to speed up looking for * @@ -106,9 +99,9 @@ struct t_clustering_data { /* Clustering helper functions */ /***********************************/ -void check_clustering(); - -//calculate the initial timing at the start of packing stage +/* + * @brief Calculate the initial timing at the start of packing stage. + */ void calc_init_packing_timing(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const Prepacker& prepacker, @@ -116,226 +109,171 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, std::shared_ptr& timing_info, vtr::vector& atom_criticality); -//free the clustering data structures +/* + * @brief Free the clustering data structures. + */ void free_clustering_data(const t_packer_opts& packer_opts, t_clustering_data& clustering_data); -//check clustering legality and output it -void check_and_output_clustering(const t_packer_opts& packer_opts, +/* + * @brief Check clustering legality and output it. + */ +void check_and_output_clustering(ClusterLegalizer& cluster_legalizer, + const t_packer_opts& packer_opts, const std::unordered_set& is_clock, - const t_arch* arch, - const int& num_clb, - const vtr::vector*>& intra_lb_routing); - -void get_max_cluster_size_and_pb_depth(int& max_cluster_size, - int& max_pb_depth); - -bool check_cluster_legality(const int& verbosity, - const int& detailed_routing_stage, - t_lb_router_data* router_data); + const t_arch* arch); +/* + * @brief Determine if atom block is in pb. + */ bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb); +/* + * @brief Add blk to list of feasible blocks sorted according to gain. + */ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, std::map& gain, t_pb* pb, int max_queue_size, AttractionInfo& attraction_groups); +/* + * @brief Remove blk from list of feasible blocks sorted according to gain. + * + * Useful for removing blocks that are repeatedly failing. If a block + * has been found to be illegal, we don't repeatedly consider it. + */ void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, t_pb* pb); - +/* + * @brief Allocates and inits the data structures used for clustering. + * + * This method initializes the list of molecules to pack, the clustering data, + * and the net info. + */ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, - t_cluster_placement_stats** cluster_placement_stats, - t_pb_graph_node*** primitives_list, const Prepacker& prepacker, t_clustering_data& clustering_data, std::unordered_map& net_output_feeds_driving_block_input, int& unclustered_list_head_size, int num_molecules); -void free_pb_stats_recursive(t_pb* pb); - -void try_update_lookahead_pins_used(t_pb* cur_pb); - -void reset_lookahead_pins_used(t_pb* cur_pb); - -void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id); - -void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, - const t_pb* primitive_pb, - const AtomNetId net_id); - -void commit_lookahead_pins_used(t_pb* cur_pb); - -bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util); - -bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb); - -bool primitive_memory_sibling_feasible(const AtomBlockId blk_id, const t_pb_type* cur_pb_type, const AtomBlockId sibling_memory_blk); - +/* + * @brief This routine returns an atom block which has not been clustered, has + * no connection to the current cluster, satisfies the cluster clock + * constraints, is a valid subblock inside the cluster, does not exceed + * the cluster subblock units available, and has ext_inps external inputs. + * Remove_flag controls whether or not blocks that have already been + * clustered are removed from the unclustered_list data structures. + * NB: to get a atom block regardless of clock constraints just set + * clocks_avail > 0. + */ t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, const enum e_removal_policy remove_flag, t_cluster_placement_stats* cluster_placement_stats_ptr, t_molecule_link* unclustered_list_head); +/* @brief This routine is used to find new blocks for clustering when there are + * no feasible blocks with any attraction to the current cluster (i.e. + * it finds blocks which are unconnected from the current cluster). It + * returns the atom block with the largest number of used inputs that + * satisfies the clocking and number of inputs constraints. If no + * suitable atom block is found, the routine returns nullptr. + */ t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size); +/* + * @brief Print the header for the clustering progress table. + */ void print_pack_status_header(); +/* + * @brief Incrementally print progress updates during clustering. + */ void print_pack_status(int num_clb, int tot_num_molecules, int num_molecules_processed, int& mols_since_last_print, int device_width, int device_height, - AttractionInfo& attraction_groups); - -void rebuild_attraction_groups(AttractionInfo& attraction_groups); - -void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb); - -e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, - t_pack_molecule* molecule, - t_pb_graph_node** primitives_list, - t_pb* pb, - int max_models, - int max_cluster_size, - ClusterBlockId clb_index, - int detailed_routing_stage, - t_lb_router_data* router_data, - int verbosity, - bool enable_pin_feasibility_filter, - int feasible_block_array_size, - t_ext_pin_util max_external_pin_util, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id, - int force_site = -1); - -void try_fill_cluster(const t_packer_opts& packer_opts, + AttractionInfo& attraction_groups, + const ClusterLegalizer& cluster_legalizer); + +/* + * @brief Periodically rebuild the attraction groups to reflect which atoms in + * them are still available for new clusters (i.e. remove the atoms that + * have already been packed from the attraction group). + */ +void rebuild_attraction_groups(AttractionInfo& attraction_groups, + const ClusterLegalizer& cluster_legalizer); + +/* + * @brief Try to pack next_molecule into the given cluster. If this succeeds + * prepares the next_molecule with a new value to pack next iteration. + * + * This method will print the pack status and update the cluster stats. + */ +void try_fill_cluster(ClusterLegalizer& cluster_legalizer, + const Prepacker& prepacker, + const t_packer_opts& packer_opts, t_cluster_placement_stats* cur_cluster_placement_stats_ptr, t_pack_molecule*& prev_molecule, t_pack_molecule*& next_molecule, int& num_same_molecules, - t_pb_graph_node** primitives_list, t_cluster_progress_stats& cluster_stats, int num_clb, - const int num_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const int detailed_routing_stage, + const LegalizationClusterId legalization_cluster_id, AttractionInfo& attraction_groups, - vtr::vector>& clb_inter_blk_nets, + vtr::vector>& clb_inter_blk_nets, bool allow_unrelated_clustering, const int& high_fanout_threshold, const std::unordered_set& is_clock, const std::unordered_set& is_global, const std::shared_ptr& timing_info, - t_lb_router_data* router_data, - t_ext_pin_util target_ext_pin_util, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id, e_block_pack_status& block_pack_status, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, std::unordered_map& net_output_feeds_driving_block_input, std::map>& primitive_candidate_block_types); -t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& packer_opts, - const int& num_clb, - const std::vector& seed_atoms, - const int& num_blocks_hill_added, - vtr::vector*>& intra_lb_routing, - int& seedindex, - t_cluster_progress_stats& cluster_stats, - t_lb_router_data* router_data); - void store_cluster_info_and_free(const t_packer_opts& packer_opts, - const ClusterBlockId& clb_index, + const LegalizationClusterId clb_index, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count, - vtr::vector>& clb_inter_blk_nets); - -void free_data_and_requeue_used_mols_if_illegal(const ClusterBlockId& clb_index, - const int& savedseedindex, - std::map& num_used_type_instances, - int& num_clb, - int& seedindex); - -enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, - const AtomBlockId blk_id, - t_pb* cb, - t_pb** parent, - const int max_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const t_cluster_placement_stats* cluster_placement_stats_ptr, - const t_pack_molecule* molecule, - t_lb_router_data* router_data, - int verbosity, - const int feasible_block_array_size); - - -/** - * @brief Checks whether an atom block can be added to a clustered block - * without violating floorplanning constraints. It also updates the - * clustered block's floorplanning region by taking the intersection of - * its current region and the floorplanning region of the given atom block. - * - * @param blk_id A unique ID for the candidate atom block to be added to the growing cluster. - * @param clb_index A unique ID for the clustered block that the atom block wants to be added to. - * @param verbosity Controls the detail level of log information printed by this function. - * @param temp_cluster_pr The floorplanning regions of the clustered block. This function may - * update the given region. - * @param cluster_pr_needs_update Indicates whether the floorplanning region of the clustered block - * have updated. - * @return True if adding the given atom block to the clustered block does not violated any - * floorplanning constraints. - */ -bool atom_cluster_floorplanning_check(AtomBlockId blk_id, - ClusterBlockId clb_index, - int verbosity, - PartitionRegion& temp_cluster_pr, - bool& cluster_pr_needs_update); -/** - * @brief Checks if an atom block can be added to a clustered block without - * violating NoC group constraints. For passing this check, either both clustered - * and atom blocks must belong to the same NoC group, or at least one of them should - * not belong to any NoC group. If the atom block is associated with a NoC group while - * the clustered block does not belong to any NoC groups, the NoC group ID of the atom block - * is assigned to the clustered block when the atom is added to it. - * block - * - * @param blk_id A unique ID for the candidate atom block to be added to the growing cluster. - * @param clb_index A unique ID for the clustered block that the atom block wants to be added to. - * @param verbosity Controls the detail level of log information printed by this function. - * @param temp_cluster_noc_grp_id The NoC group ID of the clustered block. This function may update - * this ID. - * @return True if adding the atom block the cluster does not violate NoC group constraints. - */ -bool atom_cluster_noc_group_check(AtomBlockId blk_id, - ClusterBlockId clb_index, - int verbosity, - NocGroupId& temp_cluster_noc_grp_id); - -void revert_place_atom_block(const AtomBlockId blk_id, t_lb_router_data* router_data); + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets); -void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clustered_blk_id, t_pb* cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block); +void update_connection_gain_values(const AtomNetId net_id, + const AtomBlockId clustered_blk_id, + t_pb* cur_pb, + const ClusterLegalizer& cluster_legalizer, + enum e_net_relation_to_clustered_block net_relation_to_clustered_block); void update_timing_gain_values(const AtomNetId net_id, t_pb* cur_pb, + const ClusterLegalizer& cluster_legalizer, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, const SetupTimingInfo& timing_info, const std::unordered_set& is_global, std::unordered_map& net_output_feeds_driving_block_input); +/* + * @brief Updates the marked data structures, and if gain_flag is GAIN, the gain + * when an atom block is added to a cluster. The sharinggain is the + * number of inputs that a atom block shares with blocks that are already + * in the cluster. Hillgain is the reduction in number of pins-required + * by adding a atom block to the cluster. The timinggain is the + * criticality of the most critical atom net between this atom block and + * an atom block in the cluster. + */ void mark_and_update_partial_gain(const AtomNetId net_id, enum e_gain_update gain_flag, const AtomBlockId clustered_blk_id, + const ClusterLegalizer& cluster_legalizer, bool timing_driven, bool connection_driven, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, @@ -344,10 +282,22 @@ void mark_and_update_partial_gain(const AtomNetId net_id, const int high_fanout_net_threshold, std::unordered_map& net_output_feeds_driving_block_input); +/* + * @brief Updates the total gain array to reflect the desired tradeoff between + * input sharing (sharinggain) and path_length minimization (timinggain) + * input each time a new molecule is added to the cluster. + */ void update_total_gain(float alpha, float beta, bool timing_driven, bool connection_driven, t_pb* pb, AttractionInfo& attraction_groups); +/* + * @brief Routine that is called each time a new molecule is added to the cluster. + * + * Makes calls to update cluster stats such as the gain map for atoms, used pins, + * and clock structures, in order to reflect the new content of the cluster. + * Also keeps track of which attraction group the cluster belongs to. + */ void update_cluster_stats(const t_pack_molecule* molecule, - const ClusterBlockId clb_index, + const ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, const std::unordered_set& is_global, const bool global_clocks, @@ -360,65 +310,112 @@ void update_cluster_stats(const t_pack_molecule* molecule, AttractionInfo& attraction_groups, std::unordered_map& net_output_feeds_driving_block_input); -void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, - t_pb_graph_node** primitives_list, - ClusterBlockId clb_index, +/* + * @brief Given a starting seed block, start_new_cluster determines the next + * cluster type to use. + * + * It expands the FPGA if it cannot find a legal cluster for the atom block + */ +void start_new_cluster(ClusterLegalizer& cluster_legalizer, + LegalizationClusterId& legalization_cluster_id, t_pack_molecule* molecule, std::map& num_used_type_instances, const float target_device_utilization, - const int num_models, - const int max_cluster_size, const t_arch* arch, const std::string& device_layout_name, - std::vector* lb_type_rr_graphs, - t_lb_router_data** router_data, - const int detailed_routing_stage, - ClusteredNetlist* clb_nlist, const std::map>& primitive_candidate_block_types, int verbosity, - bool enable_pin_feasibility_filter, - bool balance_block_type_utilization, - const int feasible_block_array_size, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_noc_grp_id); + bool balance_block_type_utilization); +/* + * @brief Get candidate molecule to pack into currently open cluster + * + * Molecule selection priority: + * 1. Find unpacked molecules based on criticality and strong connectedness + * (connected by low fanout nets) with current cluster. + * 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) + * with current cluster. + * 3. Find unpacked molecules based on weak connectedness (connected by high + * fanout nets) with current cluster. + * 4. Find unpacked molecules based on attraction group of the current cluster + * (if the cluster has an attraction group). + */ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, AttractionInfo& attraction_groups, const enum e_gain_type gain_mode, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + const LegalizationClusterId cluster_index, bool prioritize_transitive_connectivity, int transitive_fanout_threshold, const int feasible_block_array_size, std::map>& primitive_candidate_block_types); +/* + * @brief Add molecules with strong connectedness to the current cluster to the + * list of feasible blocks. + */ void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, const int feasible_block_array_size, AttractionInfo& attraction_groups); +/* + * @brief Add molecules based on weak connectedness (connected by high fanout + * nets) with current cluster. + */ void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, const int feasible_block_array_size, AttractionInfo& attraction_groups); +/* + * @brief If the current cluster being packed has an attraction group associated + * with it (i.e. there are atoms in it that belong to an attraction group), + * this routine adds molecules from the associated attraction group to + * the list of feasible blocks for the cluster. + * + * Attraction groups can be very large, so we only add some randomly selected + * molecules for efficiency if the number of atoms in the group is greater than + * 500. Therefore, the molecules added to the candidates will vary each time you + * call this function. + */ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, AttractionInfo& attraction_groups, const int feasible_block_array_size, - ClusterBlockId clb_index, + LegalizationClusterId clb_index, std::map>& primitive_candidate_block_types); +/* + * @brief Add molecules based on transitive connections (eg. 2 hops away) with + * current cluster. + */ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + const LegalizationClusterId cluster_index, int transitive_fanout_threshold, const int feasible_block_array_size, AttractionInfo& attraction_groups); -bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, t_cluster_placement_stats* cluster_placement_stats_ptr); +/* + * @brief Check whether a free primitive exists for each atom block in the + * molecule. + */ +bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, + t_cluster_placement_stats* cluster_placement_stats_ptr, + const ClusterLegalizer& cluster_legalizer); t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, AttractionInfo& attraction_groups, @@ -428,61 +425,112 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, const int feasible_block_array_size, int* num_unrelated_clustering_attempts, t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - ClusterBlockId cluster_index, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, + LegalizationClusterId cluster_index, int verbosity, t_molecule_link* unclustered_list_head, const int& unclustered_list_head_size, std::map>& primitive_candidate_block_types); +/* + * @brief Calculates molecule statistics for a single molecule. + */ t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const AtomNetlist& atom_nlist); std::vector initialize_seed_atoms(const e_cluster_seed seed_type, const t_molecule_stats& max_molecule_stats, + const Prepacker& prepacker, const vtr::vector& atom_criticality); -t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, const std::vector& seed_atoms); +t_pack_molecule* get_highest_gain_seed_molecule(int& seed_index, + const std::vector& seed_atoms, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer); +/* + * @brief Get gain of packing molecule into current cluster. + * + * gain is equal to: + * total_block_gain + * + molecule_base_gain*some_factor + * - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor + */ float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures); -int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id); - void print_seed_gains(const char* fname, const std::vector& seed_atoms, const vtr::vector& atom_gain, const vtr::vector& atom_criticality); -void load_transitive_fanout_candidates(ClusterBlockId cluster_index, +/** + * @brief Score unclustered atoms that are two hops away from current cluster + * + * For example, consider a cluster that has a FF feeding an adder in another + * cluster. Since this FF is feeding an adder that is packed in another cluster + * this function should find other FFs that are feeding other inputs of this adder + * since they are two hops away from the FF packed in this cluster + */ +void load_transitive_fanout_candidates(LegalizationClusterId cluster_index, t_pb_stats* pb_stats, - vtr::vector>& clb_inter_blk_nets, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + vtr::vector>& clb_inter_blk_nets, int transitive_fanout_threshold); std::map> identify_primitive_candidate_block_types(); -void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive); - -enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, - const t_pack_molecule* molecule, - const AtomBlockId blk_id); - -t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id); - +/** + * @brief This function update the pb_type_count data structure by incrementing + * the number of used pb_types in the given packed cluster t_pb + */ size_t update_pb_type_count(const t_pb* pb, std::map& pb_type_count, size_t depth); +/* + * @brief This function updates the le_count data structure from the given + * packed cluster. + */ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count); void print_pb_type_count_recurr(t_pb_type* type, size_t max_name_chars, size_t curr_depth, std::map& pb_type_count); +/* + * @brief This function identifies the logic block type which is defined by the + * block type which has a lut primitive. + */ t_logical_block_type_ptr identify_logic_block_type(std::map>& primitive_candidate_block_types); +/* + * @brief This function returns the pb_type that is similar to Logic Element (LE) + * in an FPGA. + * + * The LE is defined as a physical block that contains a LUT primitive and + * is found by searching a cluster type to find the first pb_type (from the top + * of the hierarchy clb->LE) that has more than one instance within the cluster. + */ t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type); +/* + * @brief This function returns true if the given physical block has a primitive + * matching the given blif model and is used. + */ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name); +/* + * @brief Print the LE count data strurture. + */ void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type); +/* + * @brief Given a pointer to a pb in a cluster, this routine returns a pointer + * to the top-level pb of the given pb. + * + * This is needed when updating the gain for a cluster. + */ t_pb* get_top_level_pb(t_pb* pb); -bool cleanup_pb(t_pb* pb); - -void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size); - -void init_clb_atoms_lookup(vtr::vector>& atoms_lookup); +/* + * @brief Load the mapping between clusters and their atoms. + */ +void init_clb_atoms_lookup(vtr::vector>& atoms_lookup, + const AtomContext& atom_ctx, + const ClusteredNetlist& clb_nlist); #endif diff --git a/vpr/src/pack/constraints_report.cpp b/vpr/src/pack/constraints_report.cpp index 5c53744fd5e..6b671331c74 100644 --- a/vpr/src/pack/constraints_report.cpp +++ b/vpr/src/pack/constraints_report.cpp @@ -1,9 +1,11 @@ #include "constraints_report.h" +#include "cluster_legalizer.h" +#include "globals.h" +#include "grid_tile_lookup.h" -bool floorplan_constraints_regions_overfull() { +bool floorplan_constraints_regions_overfull(const ClusterLegalizer& cluster_legalizer) { GridTileLookup grid_tiles; - auto& cluster_ctx = g_vpr_ctx.clustering(); auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); auto& device_ctx = g_vpr_ctx.device(); @@ -12,15 +14,13 @@ bool floorplan_constraints_regions_overfull() { // keep record of how many blocks of each type are assigned to each PartitionRegion std::unordered_map> pr_count_info; - for (const ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { - if (!is_cluster_constrained(blk_id)) { + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + const PartitionRegion& pr = cluster_legalizer.get_cluster_pr(cluster_id); + if (pr.empty()) continue; - } - t_logical_block_type_ptr bt = cluster_ctx.clb_nlist.block_type(blk_id); - const PartitionRegion& pr = floorplanning_ctx.cluster_constraints[blk_id]; + t_logical_block_type_ptr bt = cluster_legalizer.get_cluster_type(cluster_id); auto got = pr_count_info.find(pr); - if (got == pr_count_info.end()) { std::vector block_type_counts(block_types.size(), 0); block_type_counts[bt->index]++; diff --git a/vpr/src/pack/constraints_report.h b/vpr/src/pack/constraints_report.h index 46af3fa83db..c10d1183238 100644 --- a/vpr/src/pack/constraints_report.h +++ b/vpr/src/pack/constraints_report.h @@ -5,9 +5,7 @@ #ifndef VPR_SRC_PACK_CONSTRAINTS_REPORT_H_ #define VPR_SRC_PACK_CONSTRAINTS_REPORT_H_ -#include "globals.h" -#include "grid_tile_lookup.h" -#include "place_constraints.h" +class ClusterLegalizer; /** * @brief Check if any constraint partition regions are overfull, @@ -27,6 +25,6 @@ * * @return True if there is at least one overfull partition. */ -bool floorplan_constraints_regions_overfull(); +bool floorplan_constraints_regions_overfull(const ClusterLegalizer& cluster_legalizer); #endif /* VPR_SRC_PACK_CONSTRAINTS_REPORT_H_ */ diff --git a/vpr/src/pack/noc_aware_cluster_util.cpp b/vpr/src/pack/noc_aware_cluster_util.cpp index 51319175c12..87f981605de 100644 --- a/vpr/src/pack/noc_aware_cluster_util.cpp +++ b/vpr/src/pack/noc_aware_cluster_util.cpp @@ -1,12 +1,12 @@ #include "noc_aware_cluster_util.h" +#include "atom_netlist.h" #include "globals.h" +#include "vpr_types.h" #include -std::vector find_noc_router_atoms() { - const auto& atom_ctx = g_vpr_ctx.atom(); - +std::vector find_noc_router_atoms(const AtomNetlist& atom_netlist) { // NoC router atoms are expected to have a specific blif model const std::string noc_router_blif_model_name = "noc_router_adapter_block"; @@ -14,8 +14,8 @@ std::vector find_noc_router_atoms() { std::vector noc_router_atoms; // iterate over all atoms and find those whose blif model matches - for (auto atom_id : atom_ctx.nlist.blocks()) { - const t_model* model = atom_ctx.nlist.block_model(atom_id); + for (auto atom_id : atom_netlist.blocks()) { + const t_model* model = atom_netlist.block_model(atom_id); if (noc_router_blif_model_name == model->name) { noc_router_atoms.push_back(atom_id); } @@ -24,10 +24,10 @@ std::vector find_noc_router_atoms() { return noc_router_atoms; } -void update_noc_reachability_partitions(const std::vector& noc_atoms) { - const auto& atom_ctx = g_vpr_ctx.atom(); - auto& cl_helper_ctx = g_vpr_ctx.mutable_cl_helper(); - const auto& high_fanout_thresholds = g_vpr_ctx.cl_helper().high_fanout_thresholds; +void update_noc_reachability_partitions(const std::vector& noc_atoms, + const AtomNetlist& atom_netlist, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, + vtr::vector& atom_noc_grp_id) { const auto& grid = g_vpr_ctx.device().grid; t_logical_block_type_ptr logic_block_type = infer_logic_block_type(grid); @@ -35,11 +35,11 @@ void update_noc_reachability_partitions(const std::vector& noc_atom const size_t high_fanout_threshold = high_fanout_thresholds.get_threshold(logical_block_name); // get the total number of atoms - const size_t n_atoms = atom_ctx.nlist.blocks().size(); + const size_t n_atoms = atom_netlist.blocks().size(); vtr::vector atom_visited(n_atoms, false); - cl_helper_ctx.atom_noc_grp_id.resize(n_atoms, NocGroupId::INVALID()); + atom_noc_grp_id.resize(n_atoms, NocGroupId::INVALID()); int noc_grp_id_cnt = 0; @@ -68,24 +68,24 @@ void update_noc_reachability_partitions(const std::vector& noc_atom AtomBlockId current_atom = q.front(); q.pop(); - cl_helper_ctx.atom_noc_grp_id[current_atom] = noc_grp_id; + atom_noc_grp_id[current_atom] = noc_grp_id; - for(auto pin : atom_ctx.nlist.block_pins(current_atom)) { - AtomNetId net_id = atom_ctx.nlist.pin_net(pin); - size_t net_fanout = atom_ctx.nlist.net_sinks(net_id).size(); + for(auto pin : atom_netlist.block_pins(current_atom)) { + AtomNetId net_id = atom_netlist.pin_net(pin); + size_t net_fanout = atom_netlist.net_sinks(net_id).size(); if (net_fanout >= high_fanout_threshold) { continue; } - AtomBlockId driver_atom_id = atom_ctx.nlist.net_driver_block(net_id); + AtomBlockId driver_atom_id = atom_netlist.net_driver_block(net_id); if (!atom_visited[driver_atom_id]) { q.push(driver_atom_id); atom_visited[driver_atom_id] = true; } - for (auto sink_pin : atom_ctx.nlist.net_sinks(net_id)) { - AtomBlockId sink_atom_id = atom_ctx.nlist.pin_block(sink_pin); + for (auto sink_pin : atom_netlist.net_sinks(net_id)) { + AtomBlockId sink_atom_id = atom_netlist.pin_block(sink_pin); if (!atom_visited[sink_atom_id]) { q.push(sink_atom_id); atom_visited[sink_atom_id] = true; @@ -96,4 +96,4 @@ void update_noc_reachability_partitions(const std::vector& noc_atom } } -} \ No newline at end of file +} diff --git a/vpr/src/pack/noc_aware_cluster_util.h b/vpr/src/pack/noc_aware_cluster_util.h index abeb8d8ba95..6f930a21944 100644 --- a/vpr/src/pack/noc_aware_cluster_util.h +++ b/vpr/src/pack/noc_aware_cluster_util.h @@ -17,8 +17,12 @@ */ #include +#include "noc_data_types.h" +#include "vtr_vector.h" -#include "vpr_types.h" +class AtomNetlist; +class AtomBlockId; +class t_pack_high_fanout_thresholds; /** * @brief Iterates over all atom blocks and check whether @@ -26,7 +30,7 @@ * * @return The atom block IDs of the NoC router blocks in the netlist. */ -std::vector find_noc_router_atoms(); +std::vector find_noc_router_atoms(const AtomNetlist& atom_netlist); /** @@ -37,6 +41,9 @@ std::vector find_noc_router_atoms(); * * @param noc_atoms The atom block IDs of the NoC router blocks in the netlist. */ -void update_noc_reachability_partitions(const std::vector& noc_atoms); +void update_noc_reachability_partitions(const std::vector& noc_atoms, + const AtomNetlist& atom_netlist, + const t_pack_high_fanout_thresholds& high_fanout_threshold, + vtr::vector& atom_noc_grp_id); #endif diff --git a/vpr/src/pack/output_clustering.cpp b/vpr/src/pack/output_clustering.cpp index cee87ad51a1..c659837c5fb 100644 --- a/vpr/src/pack/output_clustering.cpp +++ b/vpr/src/pack/output_clustering.cpp @@ -8,10 +8,13 @@ #include #include +#include "cluster_legalizer.h" +#include "clustered_netlist.h" +#include "physical_types.h" +#include "prepack.h" +#include "vpr_context.h" #include "vtr_assert.h" #include "vtr_log.h" -#include "vtr_digest.h" -#include "vtr_memory.h" #include "vpr_types.h" #include "vpr_error.h" @@ -20,11 +23,8 @@ #include "globals.h" #include "atom_netlist.h" -#include "pack_types.h" -#include "cluster_router.h" #include "pb_type_graph.h" #include "output_clustering.h" -#include "read_xml_arch_file.h" #include "vpr_utils.h" #include "pack.h" @@ -36,49 +36,84 @@ static void print_clustering_stats(char* block_name, int num_block_type, float n /**************** Subroutine definitions ************************************/ -/* Prints out one cluster (clb). Both the external pins and the * - * internal connections are printed out. */ -static void print_stats() { - int ipin; - unsigned int itype; - int total_nets_absorbed; - std::unordered_map nets_absorbed; - - int *num_clb_types, *num_clb_inputs_used, *num_clb_outputs_used; - - auto& device_ctx = g_vpr_ctx.device(); - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - num_clb_types = num_clb_inputs_used = num_clb_outputs_used = nullptr; - - num_clb_types = new int[device_ctx.logical_block_types.size()]; - num_clb_inputs_used = new int[device_ctx.logical_block_types.size()]; - num_clb_outputs_used = new int[device_ctx.logical_block_types.size()]; - - for (int i = 0; i < (int)device_ctx.logical_block_types.size(); i++) { - num_clb_types[i] = 0; - num_clb_inputs_used[i] = 0; - num_clb_outputs_used[i] = 0; +static void count_clb_inputs_and_outputs_from_pb_route(const t_pb* pb, + t_logical_block_type_ptr logical_block, + int ipin, + e_pin_type pin_type, + std::unordered_map& nets_absorbed, + int num_clb_inputs_used[], + int num_clb_outputs_used[]) { + VTR_ASSERT_DEBUG(!pb->pb_route.empty()); + int pb_graph_pin_id = get_pb_graph_node_pin_from_pb_graph_node(pb->pb_graph_node, ipin)->pin_count_in_cluster; + + if (pb->pb_route.count(pb_graph_pin_id)) { + //Pin used + AtomNetId atom_net_id = pb->pb_route[pb_graph_pin_id].atom_net_id; + if (atom_net_id) { + nets_absorbed[atom_net_id] = false; + if (pin_type == RECEIVER) { + num_clb_inputs_used[logical_block->index]++; + } else if (pin_type == DRIVER) { + num_clb_outputs_used[logical_block->index]++; + } + } } +} - for (auto net_id : atom_ctx.nlist.nets()) { - nets_absorbed[net_id] = true; +static void count_stats_from_legalizer(const ClusterLegalizer& cluster_legalizer, + std::unordered_map& nets_absorbed, + int num_clb_types[], + int num_clb_inputs_used[], + int num_clb_outputs_used[]) { + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + t_logical_block_type_ptr logical_block = cluster_legalizer.get_cluster_type(cluster_id); + t_physical_tile_type_ptr physical_tile = pick_physical_type(logical_block); + for (int ipin = 0; ipin < logical_block->pb_type->num_pins; ipin++) { + int physical_pin = get_physical_pin(physical_tile, logical_block, ipin); + e_pin_type pin_type = get_pin_type_from_pin_physical_num(physical_tile, physical_pin); + + const t_pb* pb = cluster_legalizer.get_cluster_pb(cluster_id); + if (pb->pb_route.empty()) + continue; + count_clb_inputs_and_outputs_from_pb_route(pb, + logical_block, + ipin, + pin_type, + nets_absorbed, + num_clb_inputs_used, + num_clb_outputs_used); + } + num_clb_types[logical_block->index]++; } - /* Counters used only for statistics purposes. */ +} - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id); - auto physical_tile = pick_physical_type(logical_block); - for (ipin = 0; ipin < logical_block->pb_type->num_pins; ipin++) { +static void count_stats_from_netlist(std::unordered_map& nets_absorbed, + int num_clb_types[], + int num_clb_inputs_used[], + int num_clb_outputs_used[]) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + + for (ClusterBlockId blk_id : clb_nlist.blocks()) { + t_logical_block_type_ptr logical_block = clb_nlist.block_type(blk_id); + t_physical_tile_type_ptr physical_tile = pick_physical_type(logical_block); + for (int ipin = 0; ipin < logical_block->pb_type->num_pins; ipin++) { int physical_pin = get_physical_pin(physical_tile, logical_block, ipin); - auto pin_type = get_pin_type_from_pin_physical_num(physical_tile, physical_pin); - - if (cluster_ctx.clb_nlist.block_pb(blk_id)->pb_route.empty()) { - ClusterNetId clb_net_id = cluster_ctx.clb_nlist.block_net(blk_id, ipin); + e_pin_type pin_type = get_pin_type_from_pin_physical_num(physical_tile, physical_pin); + + if (!clb_nlist.block_pb(blk_id)->pb_route.empty()) { + count_clb_inputs_and_outputs_from_pb_route(clb_nlist.block_pb(blk_id), + logical_block, + ipin, + pin_type, + nets_absorbed, + num_clb_inputs_used, + num_clb_outputs_used); + } else { + ClusterNetId clb_net_id = clb_nlist.block_net(blk_id, ipin); if (clb_net_id != ClusterNetId::INVALID()) { - auto net_id = atom_ctx.lookup.atom_net(clb_net_id); + AtomNetId net_id = atom_ctx.lookup.atom_net(clb_net_id); VTR_ASSERT(net_id); nets_absorbed[net_id] = false; @@ -88,30 +123,45 @@ static void print_stats() { num_clb_outputs_used[logical_block->index]++; } } - } else { - int pb_graph_pin_id = get_pb_graph_node_pin_from_block_pin(blk_id, ipin)->pin_count_in_cluster; - - const t_pb* pb = cluster_ctx.clb_nlist.block_pb(blk_id); - if (pb->pb_route.count(pb_graph_pin_id)) { - //Pin used - auto atom_net_id = pb->pb_route[pb_graph_pin_id].atom_net_id; - if (atom_net_id) { - nets_absorbed[atom_net_id] = false; - if (pin_type == RECEIVER) { - num_clb_inputs_used[logical_block->index]++; - } else if (pin_type == DRIVER) { - num_clb_outputs_used[logical_block->index]++; - } - } - } } } num_clb_types[logical_block->index]++; } +} + +/* Prints out one cluster (clb). Both the external pins and the * + * internal connections are printed out. */ +static void print_stats(const ClusterLegalizer* cluster_legalizer_ptr, bool from_legalizer) { + const DeviceContext& device_ctx = g_vpr_ctx.device(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; + + int* num_clb_types = new int[device_ctx.logical_block_types.size()]; + int* num_clb_inputs_used = new int[device_ctx.logical_block_types.size()]; + int* num_clb_outputs_used = new int[device_ctx.logical_block_types.size()]; + + for (size_t i = 0; i < device_ctx.logical_block_types.size(); i++) { + num_clb_types[i] = 0; + num_clb_inputs_used[i] = 0; + num_clb_outputs_used[i] = 0; + } + + std::unordered_map nets_absorbed; + for (AtomNetId net_id : atom_nlist.nets()) { + nets_absorbed[net_id] = true; + } + + /* Counters used only for statistics purposes. */ + if (from_legalizer) { + VTR_ASSERT(cluster_legalizer_ptr != nullptr); + count_stats_from_legalizer(*cluster_legalizer_ptr, nets_absorbed, num_clb_types, num_clb_inputs_used, num_clb_outputs_used); + } else { + VTR_ASSERT(cluster_legalizer_ptr == nullptr); + count_stats_from_netlist(nets_absorbed, num_clb_types, num_clb_inputs_used, num_clb_outputs_used); + } print_clustering_stats_header(); - for (itype = 0; itype < device_ctx.logical_block_types.size(); itype++) { + for (unsigned int itype = 0; itype < device_ctx.logical_block_types.size(); itype++) { if (num_clb_types[itype] == 0) { print_clustering_stats(device_ctx.logical_block_types[itype].name, num_clb_types[itype], 0.0, 0.0); } else { @@ -121,14 +171,14 @@ static void print_stats() { } } - total_nets_absorbed = 0; - for (auto net_id : atom_ctx.nlist.nets()) { + int total_nets_absorbed = 0; + for (AtomNetId net_id : atom_nlist.nets()) { if (nets_absorbed[net_id] == true) { total_nets_absorbed++; } } VTR_LOG("Absorbed logical nets %d out of %d nets, %d nets not absorbed.\n", - total_nets_absorbed, (int)atom_ctx.nlist.nets().size(), (int)atom_ctx.nlist.nets().size() - total_nets_absorbed); + total_nets_absorbed, (int)atom_nlist.nets().size(), (int)atom_nlist.nets().size() - total_nets_absorbed); delete[] num_clb_types; delete[] num_clb_inputs_used; delete[] num_clb_outputs_used; @@ -162,12 +212,12 @@ static const char* clustering_xml_net_text(AtomNetId net_id) { /* This routine prints out the atom_ctx.nlist net name (or open). * net_num is the index of the atom_ctx.nlist net to be printed */ + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; if (!net_id) { return "open"; } else { - auto& atom_ctx = g_vpr_ctx.atom(); - return atom_ctx.nlist.net_name(net_id).c_str(); + return atom_nlist.net_name(net_id).c_str(); } } @@ -218,7 +268,7 @@ static std::string clustering_xml_interconnect_text(t_logical_block_type_ptr typ * cannot simply be marked open as that would lose the routing information. Instead, a block must be * output that reflects the routing resources used. This function handles both cases. */ -static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb_graph_node* pb_graph_node, int pb_index, bool is_used, const t_pb_routes& pb_route) { +static void clustering_xml_open_block(pugi::xml_node& parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb_graph_node* pb_graph_node, int pb_index, bool is_used, const t_pb_routes& pb_route) { int i, j, k, m; const t_pb_type *pb_type, *child_pb_type; t_mode* mode = nullptr; @@ -362,7 +412,7 @@ static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_bloc } /* outputs a block that is used (i.e. has configuration) and all of its child blocks */ -static void clustering_xml_block(pugi::xml_node parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb* pb, int pb_index, const t_pb_routes& pb_route) { +static void clustering_xml_block(pugi::xml_node& parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb* pb, int pb_index, const t_pb_routes& pb_route) { int i, j, k, m; const t_pb_type *pb_type, *child_pb_type; t_pb_graph_node* pb_graph_node; @@ -559,20 +609,42 @@ static void clustering_xml_block(pugi::xml_node parent_node, t_logical_block_typ } } +static void clustering_xml_blocks_from_legalizer(pugi::xml_node& block_node, + const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, + ClusterLegalizer& cluster_legalizer) { + // Finalize the cluster legalization by ensuring that each cluster pb has + // its pb_route calculated. + cluster_legalizer.finalize(); + for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) { + clustering_xml_block(block_node, + cluster_legalizer.get_cluster_type(cluster_id), + pb_graph_pin_lookup_from_index_by_type, + cluster_legalizer.get_cluster_pb(cluster_id), + size_t(cluster_id), + cluster_legalizer.get_cluster_pb(cluster_id)->pb_route); + } +} + +static void clustering_xml_blocks_from_netlist(pugi::xml_node& block_node, + const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type) { + const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + for (auto blk_id : clb_nlist.blocks()) { + /* TODO: Must do check that total CLB pins match top-level pb pins, perhaps check this earlier? */ + clustering_xml_block(block_node, + clb_nlist.block_type(blk_id), + pb_graph_pin_lookup_from_index_by_type, + clb_nlist.block_pb(blk_id), + size_t(blk_id), + clb_nlist.block_pb(blk_id)->pb_route); + } +} + /* This routine dumps out the output netlist in a format suitable for * * input to vpr. This routine also dumps out the internal structure of * * the cluster, in essentially a graph based format. */ -void output_clustering(const vtr::vector*>& intra_lb_routing, bool global_clocks, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering) { - auto& device_ctx = g_vpr_ctx.device(); - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - if (!intra_lb_routing.empty()) { - VTR_ASSERT(intra_lb_routing.size() == cluster_ctx.clb_nlist.blocks().size()); - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - cluster_ctx.clb_nlist.block_pb(blk_id)->pb_route = alloc_and_load_pb_route(intra_lb_routing[blk_id], cluster_ctx.clb_nlist.block_pb(blk_id)->pb_graph_node); - } - } +void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, bool global_clocks, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering, bool from_legalizer) { + const DeviceContext& device_ctx = g_vpr_ctx.device(); + const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; IntraLbPbPinLookup pb_graph_pin_lookup_from_index_by_type(device_ctx.logical_block_types); @@ -582,26 +654,26 @@ void output_clustering(const vtr::vector inputs; std::vector outputs; - for (auto blk_id : atom_ctx.nlist.blocks()) { - auto type = atom_ctx.nlist.block_type(blk_id); + for (auto blk_id : atom_nlist.blocks()) { + auto type = atom_nlist.block_type(blk_id); switch (type) { case AtomBlockType::INPAD: if (skip_clustering) { VTR_ASSERT(0); } - inputs.push_back(atom_ctx.nlist.block_name(blk_id)); + inputs.push_back(atom_nlist.block_name(blk_id)); break; case AtomBlockType::OUTPAD: if (skip_clustering) { VTR_ASSERT(0); } - outputs.push_back(atom_ctx.nlist.block_name(blk_id)); + outputs.push_back(atom_nlist.block_name(blk_id)); break; case AtomBlockType::BLOCK: @@ -612,7 +684,7 @@ void output_clustering(const vtr::vector clocks; - for (auto net_id : atom_ctx.nlist.nets()) { + for (auto net_id : atom_nlist.nets()) { if (is_clock.count(net_id)) { - clocks.push_back(atom_ctx.nlist.net_name(net_id)); + clocks.push_back(atom_nlist.net_name(net_id)); } } @@ -631,25 +703,22 @@ void output_clustering(const vtr::vectorpb_route); + if (from_legalizer) { + VTR_ASSERT(cluster_legalizer_ptr != nullptr); + clustering_xml_blocks_from_legalizer(block_node, pb_graph_pin_lookup_from_index_by_type, *cluster_legalizer_ptr); + } else { + VTR_ASSERT(cluster_legalizer_ptr == nullptr); + clustering_xml_blocks_from_netlist(block_node, pb_graph_pin_lookup_from_index_by_type); } } out_xml.save_file(out_fname); - print_stats(); - - if (!intra_lb_routing.empty()) { - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { - cluster_ctx.clb_nlist.block_pb(blk_id)->pb_route.clear(); - } - } + print_stats(cluster_legalizer_ptr, from_legalizer); } /******************************************************************** - * An useful API to output packing results to a XML file + * A useful API to output packing results to a XML file * This function is a wrapper for the function output_clustering() * but remove all the requirements on input data structures that * have to be built with other APIs @@ -660,13 +729,15 @@ void output_clustering(const vtr::vector*> intra_lb_routing_placeholder; std::unordered_set is_clock = alloc_and_load_is_clock(); - output_clustering(intra_lb_routing_placeholder, + // Since the cluster legalizer is not being used to output the clustering + // (from_legalizer is false), passing in nullptr. + output_clustering(nullptr, global_clocks, is_clock, architecture_id, out_fname, - false); + false, /*skip_clustering*/ + false /*from_legalizer*/); } diff --git a/vpr/src/pack/output_clustering.h b/vpr/src/pack/output_clustering.h index 509690e4934..92d734248d1 100644 --- a/vpr/src/pack/output_clustering.h +++ b/vpr/src/pack/output_clustering.h @@ -1,12 +1,31 @@ #ifndef OUTPUT_CLUSTERING_H #define OUTPUT_CLUSTERING_H -#include + #include -#include "vpr_types.h" -#include "pack_types.h" +#include + +class AtomNetId; +class ClusterLegalizer; -void output_clustering(const vtr::vector*>& intra_lb_routing, bool global_clocks, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering); +/// @brief Output the clustering, given by the ClusterLegalizer or a clustered +/// netlist, to a clustered netlist file. +/// +/// The clustering can be output from the following sources: +/// 1) From the clustering +/// 2) From another clustered netlist +/// If from_legalizer is true, the ClusterLegalizer will be used to generate the +/// clustered netlist. If from_legalizer is false, the clustered netlist currently +/// in the global scope will be used. +void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, + bool global_clocks, + const std::unordered_set& is_clock, + const std::string& architecture_id, + const char* out_fname, + bool skip_clustering, + bool from_legalizer); -void write_packing_results_to_xml(const bool& global_clocks, const std::string& architecture_id, const char* out_fname); +void write_packing_results_to_xml(const bool& global_clocks, + const std::string& architecture_id, + const char* out_fname); #endif diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index e8c68ea2244..a4a1dcc09ee 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -1,22 +1,17 @@ #include +#include "SetupGrid.h" +#include "cluster.h" +#include "cluster_legalizer.h" +#include "cluster_util.h" +#include "globals.h" +#include "pack.h" +#include "prepack.h" #include "vpr_context.h" -#include "vtr_assert.h" -#include "vtr_log.h" - #include "vpr_error.h" #include "vpr_types.h" - -#include "globals.h" -#include "prepack.h" -#include "pack_types.h" -#include "pack.h" -#include "cluster.h" -#include "SetupGrid.h" -#include "noc_aware_cluster_util.h" - -/* #define DUMP_PB_GRAPH 1 */ -/* #define DUMP_BLIF_INPUT 1 */ +#include "vtr_assert.h" +#include "vtr_log.h" static bool try_size_device_grid(const t_arch& arch, const std::map& num_type_instances, @@ -38,9 +33,7 @@ bool try_pack(t_packer_opts* packer_opts, const t_model* library_models, float interc_delay, std::vector* lb_type_rr_graphs) { - AtomContext& atom_mutable_ctx = g_vpr_ctx.mutable_atom(); const AtomContext& atom_ctx = g_vpr_ctx.atom(); - ClusteringHelperContext& helper_ctx = g_vpr_ctx.mutable_cl_helper(); const DeviceContext& device_ctx = g_vpr_ctx.device(); std::unordered_set is_clock, is_global; @@ -48,8 +41,7 @@ bool try_pack(t_packer_opts* packer_opts, VTR_LOG("Begin packing '%s'.\n", packer_opts->circuit_file_name.c_str()); /* determine number of models in the architecture */ - helper_ctx.num_models = count_models(user_models); - helper_ctx.num_models += count_models(library_models); + size_t num_models = count_models(user_models) + count_models(library_models); is_clock = alloc_and_load_is_clock(); is_global.insert(is_clock.begin(), is_clock.end()); @@ -71,8 +63,11 @@ bool try_pack(t_packer_opts* packer_opts, atom_ctx.nlist.blocks().size(), atom_ctx.nlist.nets().size(), num_p_inputs, num_p_outputs); // Run the prepacker, packing the atoms into molecules. + // The Prepacker object performs prepacking and stores the pack molecules. + // As long as the molecules are used, this object must persist. VTR_LOG("Begin prepacking.\n"); - atom_mutable_ctx.prepacker.init(atom_ctx.nlist, device_ctx.logical_block_types); + Prepacker prepacker; + prepacker.init(atom_ctx.nlist, device_ctx.logical_block_types); /* We keep attraction groups off in the first iteration, and * only turn on in later iterations if some floorplan regions turn out to be overfull. @@ -86,11 +81,11 @@ bool try_pack(t_packer_opts* packer_opts, VTR_LOG("Using inter-cluster delay: %g\n", packer_opts->inter_cluster_net_delay); } - helper_ctx.target_external_pin_util = t_ext_pin_util_targets(packer_opts->target_external_pin_util); - helper_ctx.high_fanout_thresholds = t_pack_high_fanout_thresholds(packer_opts->high_fanout_threshold); - - VTR_LOG("Packing with pin utilization targets: %s\n", helper_ctx.target_external_pin_util.to_string().c_str()); - VTR_LOG("Packing with high fanout thresholds: %s\n", helper_ctx.high_fanout_thresholds.to_string().c_str()); + // During clustering, a block is related to un-clustered primitives with nets. + // This relation has three types: low fanout, high fanout, and transitive + // high_fanout_thresholds stores the threshold for nets to a block type to + // be considered high fanout. + t_pack_high_fanout_thresholds high_fanout_thresholds(packer_opts->high_fanout_threshold); bool allow_unrelated_clustering = false; if (packer_opts->allow_unrelated_clustering == e_unrelated_clustering::ON) { @@ -109,30 +104,46 @@ bool try_pack(t_packer_opts* packer_opts, int pack_iteration = 1; bool floorplan_regions_overfull = false; - // find all NoC router atoms - auto noc_atoms = find_noc_router_atoms(); - update_noc_reachability_partitions(noc_atoms); + // Initialize the cluster legalizer. + ClusterLegalizer cluster_legalizer(atom_ctx.nlist, + prepacker, + device_ctx.logical_block_types, + lb_type_rr_graphs, + num_models, + packer_opts->target_external_pin_util, + high_fanout_thresholds, + ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE, + packer_opts->enable_pin_feasibility_filter, + packer_opts->feasible_block_array_size, + packer_opts->pack_verbosity); + + VTR_LOG("Packing with pin utilization targets: %s\n", cluster_legalizer.get_target_external_pin_util().to_string().c_str()); + VTR_LOG("Packing with high fanout thresholds: %s\n", high_fanout_thresholds.to_string().c_str()); while (true) { free_clustering_data(*packer_opts, clustering_data); + //Cluster the netlist - helper_ctx.num_used_type_instances = do_clustering( - *packer_opts, - *analysis_opts, - arch, - atom_mutable_ctx.prepacker, - is_clock, - is_global, - allow_unrelated_clustering, - balance_block_type_util, - lb_type_rr_graphs, - attraction_groups, - floorplan_regions_overfull, - clustering_data); + // num_used_type_instances: A map used to save the number of used + // instances from each logical block type. + std::map num_used_type_instances; + num_used_type_instances = do_clustering(*packer_opts, + *analysis_opts, + arch, + prepacker, + cluster_legalizer, + is_clock, + is_global, + allow_unrelated_clustering, + balance_block_type_util, + attraction_groups, + floorplan_regions_overfull, + high_fanout_thresholds, + clustering_data); //Try to size/find a device - bool fits_on_device = try_size_device_grid(*arch, helper_ctx.num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout); + bool fits_on_device = try_size_device_grid(*arch, num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout); /* We use this bool to determine the cause for the clustering not being dense enough. If the clustering * is not dense enough and there are floorplan constraints, it is presumed that the constraints are the cause @@ -187,7 +198,13 @@ bool try_pack(t_packer_opts* packer_opts, VTR_LOG("Pack iteration is %d\n", pack_iteration); attraction_groups.set_att_group_pulls(4); t_ext_pin_util pin_util(1.0, 1.0); - helper_ctx.target_external_pin_util.set_block_pin_util("clb", pin_util); + // TODO: This line assumes the logic block name is "clb" which + // may not be the case. This may need to be investigated. + // Probably we should do this update of ext_pin_util for + // all types that were overused. Or if that is hard, just + // do it for all block types. Doing it only for a clb + // string is dangerous -VB. + cluster_legalizer.get_target_external_pin_util().set_block_pin_util("clb", pin_util); } } else { //Unable to pack densely enough: Give Up @@ -201,8 +218,8 @@ bool try_pack(t_packer_opts* packer_opts, std::string resource_reqs; std::string resource_avail; auto& grid = g_vpr_ctx.device().grid; - for (auto iter = helper_ctx.num_used_type_instances.begin(); iter != helper_ctx.num_used_type_instances.end(); ++iter) { - if (iter != helper_ctx.num_used_type_instances.begin()) { + for (auto iter = num_used_type_instances.begin(); iter != num_used_type_instances.end(); ++iter) { + if (iter != num_used_type_instances.begin()) { resource_reqs += ", "; resource_avail += ", "; } @@ -230,8 +247,8 @@ bool try_pack(t_packer_opts* packer_opts, g_vpr_ctx.mutable_floorplanning().cluster_constraints.clear(); //attraction_groups.reset_attraction_groups(); - free_cluster_placement_stats(helper_ctx.cluster_placement_stats); - delete[] helper_ctx.primitives_list; + // Reset the cluster legalizer for re-clustering. + cluster_legalizer.reset(); ++pack_iteration; } @@ -252,7 +269,7 @@ bool try_pack(t_packer_opts* packer_opts, /******************** End **************************/ //check clustering and output it - check_and_output_clustering(*packer_opts, is_clock, arch, helper_ctx.total_clb_num, clustering_data.intra_lb_routing); + check_and_output_clustering(cluster_legalizer, *packer_opts, is_clock, arch); // Free Data Structures free_clustering_data(*packer_opts, clustering_data); diff --git a/vpr/src/pack/pack.h b/vpr/src/pack/pack.h index 0115d2c859a..842feb0aacd 100644 --- a/vpr/src/pack/pack.h +++ b/vpr/src/pack/pack.h @@ -1,11 +1,15 @@ #ifndef PACK_H #define PACK_H -#include #include -#include "vpr_types.h" +#include class AtomNetId; +struct t_analysis_opts; +struct t_arch; +struct t_lb_type_rr_node; +struct t_model; +struct t_packer_opts; bool try_pack(t_packer_opts* packer_opts, const t_analysis_opts* analysis_opts, diff --git a/vpr/src/pack/post_routing_pb_pin_fixup.cpp b/vpr/src/pack/post_routing_pb_pin_fixup.cpp index 2126c0b7b1a..e8c2c34a126 100644 --- a/vpr/src/pack/post_routing_pb_pin_fixup.cpp +++ b/vpr/src/pack/post_routing_pb_pin_fixup.cpp @@ -107,7 +107,7 @@ static void update_cluster_pin_with_post_routing_results(const Netlist<>& net_li * Deposit all the sides */ if (wanted_sides.empty()) { - for (e_side side : {TOP, BOTTOM, LEFT, RIGHT}) { + for (e_side side : TOTAL_2D_SIDES) { wanted_sides.push_back(side); } } diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp deleted file mode 100644 index a1b48d0e083..00000000000 --- a/vpr/src/pack/re_cluster.cpp +++ /dev/null @@ -1,276 +0,0 @@ -#include "re_cluster.h" -#include "re_cluster_util.h" -#include "initial_placement.h" -#include "cluster_placement.h" -#include "cluster_router.h" - -bool move_mol_to_new_cluster(t_pack_molecule* molecule, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& device_ctx = g_vpr_ctx.device(); - - bool is_removed, is_created; - ClusterBlockId old_clb = atom_to_cluster(molecule->atom_block_ids[molecule->root]); - int molecule_size = get_array_size_of_molecule(molecule); - - NocGroupId temp_noc_grp_id = NocGroupId::INVALID(); - PartitionRegion temp_cluster_pr; - t_lb_router_data* old_router_data = nullptr; - t_lb_router_data* router_data = nullptr; - - //Check that there is a place for a new cluster of the same type - t_logical_block_type_ptr block_type = cluster_ctx.clb_nlist.block_type(old_clb); - int block_mode = cluster_ctx.clb_nlist.block_pb(old_clb)->mode; - - unsigned int num_instances = 0; - for (auto equivalent_tile : block_type->equivalent_tiles) { - num_instances += device_ctx.grid.num_instances(equivalent_tile, -1); - } - - if (helper_ctx.num_used_type_instances[block_type] == num_instances) { - VTR_LOGV(verbosity > 4, "The utilization of block_type %s is 100%. No space for new clusters\n", block_type->name); - VTR_LOGV(verbosity > 4, "Atom %d move aborted\n", molecule->atom_block_ids[molecule->root]); - return false; - } - - //remove the molecule from its current cluster - std::unordered_set& old_clb_atoms = cluster_to_mutable_atoms(old_clb); - if (old_clb_atoms.size() == 1) { - VTR_LOGV(verbosity > 4, "Atom: %zu move failed. This is the last atom in its cluster.\n"); - return false; - } - remove_mol_from_cluster(molecule, molecule_size, old_clb, old_clb_atoms, false, old_router_data); - - //check old cluster legality after removing the molecule - is_removed = is_cluster_legal(old_router_data); - - //if the cluster is legal, commit the molecule removal. Otherwise, abort the move - if (is_removed) { - commit_mol_removal(molecule, molecule_size, old_clb, during_packing, old_router_data, clustering_data); - } else { - VTR_LOGV(verbosity > 4, "Atom: %zu move failed. Can't remove it from the old cluster\n", molecule->atom_block_ids[molecule->root]); - return false; - } - - //Create new cluster of the same type and mode. - ClusterBlockId new_clb(helper_ctx.total_clb_num); - is_created = start_new_cluster_for_mol(molecule, - block_type, - block_mode, - helper_ctx.feasible_block_array_size, - helper_ctx.enable_pin_feasibility_filter, - new_clb, - during_packing, - verbosity, - clustering_data, - &router_data, - temp_cluster_pr, - temp_noc_grp_id); - - //Commit or revert the move - if (is_created) { - commit_mol_move(old_clb, new_clb, during_packing, true); - VTR_LOGV(verbosity > 4, "Atom:%zu is moved to a new cluster\n", molecule->atom_block_ids[molecule->root]); - } else { - revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data); - VTR_LOGV(verbosity > 4, "Atom:%zu move failed. Can't start a new cluster of the same type and mode\n", molecule->atom_block_ids[molecule->root]); - } - - free_router_data(old_router_data); - old_router_data = nullptr; - - //If the move is done after packing not during it, some fixes need to be done on the - //clustered netlist - if (is_created && !during_packing) { - fix_clustered_netlist(molecule, molecule_size, old_clb, new_clb); - } - - return (is_created); -} - -bool move_mol_to_existing_cluster(t_pack_molecule* molecule, - const ClusterBlockId& new_clb, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data) { - //define local variables - bool is_removed, is_added; - AtomBlockId root_atom_id = molecule->atom_block_ids[molecule->root]; - int molecule_size = get_array_size_of_molecule(molecule); - t_lb_router_data* old_router_data = nullptr; - std::unordered_set& new_clb_atoms = cluster_to_mutable_atoms(new_clb); - ClusterBlockId old_clb = atom_to_cluster(root_atom_id); - - //check old and new clusters compatibility - bool is_compatible = check_type_and_mode_compatibility(old_clb, new_clb, verbosity); - if (!is_compatible) - return false; - - //remove the molecule from its current cluster - std::unordered_set& old_clb_atoms = cluster_to_mutable_atoms(old_clb); - if (old_clb_atoms.size() == 1) { - VTR_LOGV(verbosity > 4, "Atom: %zu move failed. This is the last atom in its cluster.\n"); - return false; - } - remove_mol_from_cluster(molecule, molecule_size, old_clb, old_clb_atoms, false, old_router_data); - - //check old cluster legality after removing the molecule - is_removed = is_cluster_legal(old_router_data); - - //if the cluster is legal, commit the molecule removal. Otherwise, abort the move - if (is_removed) { - commit_mol_removal(molecule, molecule_size, old_clb, during_packing, old_router_data, clustering_data); - } else { - VTR_LOGV(verbosity > 4, "Atom: %zu move failed. Can't remove it from the old cluster\n", root_atom_id); - return false; - } - - //Add the atom to the new cluster - t_lb_router_data* new_router_data = nullptr; - is_added = pack_mol_in_existing_cluster(molecule, molecule_size, new_clb, new_clb_atoms, during_packing, clustering_data, new_router_data); - - //Commit or revert the move - if (is_added) { - commit_mol_move(old_clb, new_clb, during_packing, false); - VTR_LOGV(verbosity > 4, "Atom:%zu is moved to a new cluster\n", molecule->atom_block_ids[molecule->root]); - } else { - revert_mol_move(old_clb, molecule, old_router_data, during_packing, clustering_data); - VTR_LOGV(verbosity > 4, "Atom:%zu move failed. Can't start a new cluster of the same type and mode\n", molecule->atom_block_ids[molecule->root]); - } - - free_router_data(old_router_data); - old_router_data = nullptr; - - //If the move is done after packing not during it, some fixes need to be done on the - //clustered netlist - if (is_added && !during_packing) { - fix_clustered_netlist(molecule, molecule_size, old_clb, new_clb); - } - - return (is_added); -} - -#if 1 -bool swap_two_molecules(t_pack_molecule* molecule_1, - t_pack_molecule* molecule_2, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - //define local variables - PartitionRegion temp_cluster_pr_1, temp_cluster_pr_2; - - bool mol_1_success, mol_2_success; - - AtomBlockId root_1_atom_id = molecule_1->atom_block_ids[molecule_1->root]; - AtomBlockId root_2_atom_id = molecule_2->atom_block_ids[molecule_2->root]; - - int molecule_1_size = get_array_size_of_molecule(molecule_1); - int molecule_2_size = get_array_size_of_molecule(molecule_2); - - //Check that the 2 clusters are the same type - ClusterBlockId clb_1 = atom_to_cluster(root_1_atom_id); - ClusterBlockId clb_2 = atom_to_cluster(root_2_atom_id); - - if (clb_1 == clb_2) { - VTR_LOGV(verbosity > 4, "Swap failed. Both atoms are already in the same cluster.\n"); - return false; - } - //Check that the old and new clusters are of the same type - bool is_compitable = check_type_and_mode_compatibility(clb_1, clb_2, verbosity); - if (!is_compitable) - return false; - - t_lb_router_data* old_1_router_data = nullptr; - t_lb_router_data* old_2_router_data = nullptr; - - //save the atoms of the 2 clusters - std::unordered_set& clb_1_atoms = cluster_to_mutable_atoms(clb_1); - std::unordered_set& clb_2_atoms = cluster_to_mutable_atoms(clb_2); - - if (clb_1_atoms.size() == 1 || clb_2_atoms.size() == 1) { - VTR_LOGV(verbosity > 4, "Atom: %zu, %zu swap failed. This is the last atom in its cluster.\n", - molecule_1->atom_block_ids[molecule_1->root], - molecule_2->atom_block_ids[molecule_2->root]); - return false; - } - - t_pb* clb_pb_1 = cluster_ctx.clb_nlist.block_pb(clb_1); - std::string clb_pb_1_name = static_cast(clb_pb_1->name); - t_pb* clb_pb_2 = cluster_ctx.clb_nlist.block_pb(clb_2); - std::string clb_pb_2_name = static_cast(clb_pb_2->name); - - //remove the molecule from its current cluster - remove_mol_from_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, false, old_1_router_data); - commit_mol_removal(molecule_1, molecule_1_size, clb_1, during_packing, old_1_router_data, clustering_data); - - remove_mol_from_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, false, old_2_router_data); - commit_mol_removal(molecule_2, molecule_2_size, clb_2, during_packing, old_2_router_data, clustering_data); - - //Add the atom to the new cluster - mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data); - if (!mol_1_success) { - mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data); - mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data); - - VTR_ASSERT(mol_1_success && mol_2_success); - free_router_data(old_1_router_data); - free_router_data(old_2_router_data); - old_1_router_data = nullptr; - old_2_router_data = nullptr; - - free(clb_pb_1->name); - cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str()); - free(clb_pb_2->name); - cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str()); - - return false; - } - - mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data); - if (!mol_2_success) { - remove_mol_from_cluster(molecule_1, molecule_1_size, clb_2, clb_2_atoms, true, old_2_router_data); - commit_mol_removal(molecule_1, molecule_1_size, clb_2, during_packing, old_2_router_data, clustering_data); - mol_1_success = pack_mol_in_existing_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, during_packing, clustering_data, old_1_router_data); - mol_2_success = pack_mol_in_existing_cluster(molecule_2, molecule_2_size, clb_2, clb_2_atoms, during_packing, clustering_data, old_2_router_data); - - VTR_ASSERT(mol_1_success && mol_2_success); - free_router_data(old_1_router_data); - free_router_data(old_2_router_data); - old_1_router_data = nullptr; - old_2_router_data = nullptr; - - free(clb_pb_1->name); - cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str()); - free(clb_pb_2->name); - cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str()); - - return false; - } - - //commit the move if succeeded or revert if failed - VTR_ASSERT(mol_1_success && mol_2_success); - - //If the move is done after packing not during it, some fixes need to be done on the clustered netlist - if (!during_packing) { - fix_clustered_netlist(molecule_1, molecule_1_size, clb_1, clb_2); - fix_clustered_netlist(molecule_2, molecule_2_size, clb_2, clb_1); - } - - free_router_data(old_1_router_data); - free_router_data(old_2_router_data); - old_1_router_data = nullptr; - old_2_router_data = nullptr; - - free(clb_pb_1->name); - cluster_ctx.clb_nlist.block_pb(clb_1)->name = vtr::strdup(clb_pb_1_name.c_str()); - free(clb_pb_2->name); - cluster_ctx.clb_nlist.block_pb(clb_2)->name = vtr::strdup(clb_pb_2_name.c_str()); - - return true; -} -#endif diff --git a/vpr/src/pack/re_cluster.h b/vpr/src/pack/re_cluster.h deleted file mode 100644 index 5ca2489aac4..00000000000 --- a/vpr/src/pack/re_cluster.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef RE_CLUSTER_H -#define RE_CLUSTER_H -/** - * @file This file includes an API function that updates clustering after its done - * - * To optimize the clustering decisions, this file provides an API that can open up already - * packed clusters and change them. The functions in this API can be used in 2 locations: - * - During packing after the clusterer is done - * - During placement after the initial placement is done - * - */ - -#include "pack_types.h" -#include "clustered_netlist_utils.h" -#include "cluster_util.h" - -/** - * @brief This function moves a molecule out of its cluster and creates a new cluster for it - * - * This function can be called from 2 spots in the vpr flow. - * - First, during packing to optimize the initial clustered netlist - * (during_packing variable should be true.) - * - Second, during placement (during_packing variable should be false). In this case, the clustered - * netlist is updated. - */ -bool move_mol_to_new_cluster(t_pack_molecule* molecule, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data); - -/** - * @brief This function moves a molecule out of its cluster to another cluster that already exists. - * - * This function can be called from 2 spots in the vpr flow. - * - First, during packing to optimize the initial clustered netlist - * (during_packing variable should be true.) - * - Second, during placement (during_packing variable should be false). In this case, the clustered - * netlist is updated. - */ -bool move_mol_to_existing_cluster(t_pack_molecule* molecule, - const ClusterBlockId& new_clb, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data); - -/** - * @brief This function swap two molecules between two different clusters. - * - * This function can be called from 2 spots in the vpr flow. - * - First, during packing to optimize the initial clustered netlist - * (during_packing variable should be true.) - * - Second, during placement (during_packing variable should be false). In this case, the clustered - * netlist is updated. - */ -bool swap_two_molecules(t_pack_molecule* molecule_1, - t_pack_molecule* molecule_2, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data); -#endif diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp deleted file mode 100644 index 46eb04955a5..00000000000 --- a/vpr/src/pack/re_cluster_util.cpp +++ /dev/null @@ -1,764 +0,0 @@ -#include "re_cluster_util.h" -#include "clustered_netlist_utils.h" -#include "cluster_util.h" -#include "cluster_router.h" -#include "cluster_placement.h" -#include "place_macro.h" -#include "initial_placement.h" -#include "read_netlist.h" - -// The name suffix of the new block (if exists) -// This suffix is useful in preventing duplicate high-level cluster block names -const char* name_suffix = "_m"; - -/******************* Static Functions ********************/ -static void load_atom_index_for_pb_pin(t_pb_routes& pb_route, int ipin); -static void load_internal_to_block_net_nums(const t_logical_block_type_ptr type, t_pb_routes& pb_route); -static void fix_atom_pin_mapping(const AtomBlockId blk); - -static void fix_cluster_pins_after_moving(const ClusterBlockId clb_index); - -static std::pair check_net_absorption(AtomNetId atom_net_id, - ClusterBlockId new_clb, - ClusterBlockId old_clb, - ClusterPinId& cluster_pin_id); - -static void fix_cluster_port_after_moving(const ClusterBlockId clb_index); - -static void fix_cluster_net_after_moving(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - ClusterBlockId new_clb); - -static void rebuild_cluster_placement_stats(ClusterBlockId clb_index, - const std::unordered_set& clb_atoms); - -static void update_cluster_pb_stats(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId clb_index, - bool is_added); - -/***************** API functions ***********************/ -ClusterBlockId atom_to_cluster(AtomBlockId atom) { - auto& atom_ctx = g_vpr_ctx.atom(); - return (atom_ctx.lookup.atom_clb(atom)); -} - -const std::unordered_set& cluster_to_atoms(ClusterBlockId cluster) { - const auto& atoms = cluster_to_mutable_atoms(cluster); - - return atoms; -} - -void remove_mol_from_cluster(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId& old_clb, - std::unordered_set& old_clb_atoms, - bool router_data_ready, - t_lb_router_data*& router_data) { - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - - for (int i_atom = 0; i_atom < molecule_size; i_atom++) { - if (molecule->atom_block_ids[i_atom]) { - auto it = old_clb_atoms.find(molecule->atom_block_ids[i_atom]); - if (it != old_clb_atoms.end()) - old_clb_atoms.erase(molecule->atom_block_ids[i_atom]); - } - } - - //re-build router_data structure for this cluster - if (!router_data_ready) - router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, old_clb, old_clb_atoms); - - update_cluster_pb_stats(molecule, molecule_size, old_clb, false); -} - -void commit_mol_move(ClusterBlockId old_clb, - ClusterBlockId new_clb, - bool during_packing, - bool new_clb_created) { - auto& device_ctx = g_vpr_ctx.device(); - - //place the new cluster if this function called during placement (after the initial placement is done) - if (!during_packing && new_clb_created) { - int imacro; - g_vpr_ctx.mutable_placement().mutable_block_locs().resize(g_vpr_ctx.placement().block_locs().size() + 1); - get_imacro_from_iblk(&imacro, old_clb, g_vpr_ctx.placement().pl_macros); - set_imacro_for_iblk(&imacro, new_clb); - place_one_block(new_clb, device_ctx.pad_loc_type, nullptr, nullptr, g_vpr_ctx.mutable_placement().mutable_blk_loc_registry()); - } -} - -t_lb_router_data* lb_load_router_data(std::vector* lb_type_rr_graphs, - ClusterBlockId clb_index, - const std::unordered_set& clb_atoms) { - //build data structures used by intra-logic block router - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& atom_ctx = g_vpr_ctx.atom(); - auto block_type = cluster_ctx.clb_nlist.block_type(clb_index); - t_lb_router_data* router_data = alloc_and_load_router_data(&lb_type_rr_graphs[block_type->index], block_type); - - for (auto atom_id : clb_atoms) { - add_atom_as_target(router_data, atom_id); - const t_pb* pb = atom_ctx.lookup.atom_pb(atom_id); - while (pb) { - set_reset_pb_modes(router_data, pb, true); - pb = pb->parent_pb; - } - } - return (router_data); -} - -bool start_new_cluster_for_mol(t_pack_molecule* molecule, - const t_logical_block_type_ptr type, - const int mode, - const int feasible_block_array_size, - bool enable_pin_feasibility_filter, - ClusterBlockId clb_index, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data, - t_lb_router_data** router_data, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_cluster_noc_grp_id, - enum e_detailed_routing_stages detailed_routing_stage, - int force_site) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - /* Cluster's PartitionRegion is empty initially, meaning it has no floorplanning constraints */ - PartitionRegion empty_pr; - floorplanning_ctx.cluster_constraints.push_back(empty_pr); - - /* Allocate a dummy initial cluster and load a atom block as a seed and check if it is legal */ - AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; - const std::string& root_atom_name = atom_ctx.nlist.block_name(root_atom); - //const t_model* root_model = atom_ctx.nlist.block_model(root_atom); - - t_pb* pb = new t_pb; - pb->pb_graph_node = type->pb_graph_head; - alloc_and_load_pb_stats(pb, feasible_block_array_size); - pb->parent_pb = nullptr; - - *router_data = alloc_and_load_router_data(&(helper_ctx.lb_type_rr_graphs[type->index]), type); - - e_block_pack_status pack_result = e_block_pack_status::BLK_STATUS_UNDEFINED; - pb->mode = mode; - t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[type->index]); - reset_cluster_placement_stats(cluster_placement_stats); - set_mode_cluster_placement_stats(pb->pb_graph_node, mode); - - pack_result = try_pack_molecule(cluster_placement_stats, - molecule, - helper_ctx.primitives_list, - pb, - helper_ctx.num_models, - helper_ctx.max_cluster_size, - clb_index, - detailed_routing_stage, - *router_data, - 0, - enable_pin_feasibility_filter, - 0, - FULL_EXTERNAL_PIN_UTIL, - temp_cluster_pr, - temp_cluster_noc_grp_id, - force_site); - - // If clustering succeeds, add it to the clb netlist - if (pack_result == e_block_pack_status::BLK_PASSED) { - VTR_LOGV(verbosity > 4, "\tPASSED_SEED: Block Type %s\n", type->name); - //Once clustering succeeds, add it to the clb netlist - if (pb->name != nullptr) { - free(pb->name); - } - std::string new_name = root_atom_name + name_suffix; - pb->name = vtr::strdup(new_name.c_str()); - clb_index = cluster_ctx.clb_nlist.create_block(new_name.c_str(), pb, type); - helper_ctx.total_clb_num++; - int molecule_size = get_array_size_of_molecule(molecule); - update_cluster_pb_stats(molecule, molecule_size, clb_index, true); - - // Update the clb-->atoms lookup table - helper_ctx.atoms_lookup.resize(helper_ctx.total_clb_num); - for (int i_atom = 0; i_atom < molecule_size; ++i_atom) { - if (molecule->atom_block_ids[i_atom]) { - helper_ctx.atoms_lookup[clb_index].insert(molecule->atom_block_ids[i_atom]); - } - } - - //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist. - if (during_packing) { - clustering_data.intra_lb_routing.push_back((*router_data)->saved_lb_nets); - (*router_data)->saved_lb_nets = nullptr; - } else { - cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route = alloc_and_load_pb_route((*router_data)->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(clb_index)->pb_graph_node); - } - } else { - free_pb(pb); - delete pb; - } - - //Free failed clustering - free_router_data(*router_data); - *router_data = nullptr; - - return (pack_result == e_block_pack_status::BLK_PASSED); -} - -bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId new_clb, - std::unordered_set& new_clb_atoms, - bool during_packing, - t_clustering_data& clustering_data, - t_lb_router_data*& router_data, - enum e_detailed_routing_stages detailed_routing_stage, - bool enable_pin_feasibility_filter, - int force_site) { - - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - NocGroupId temp_cluster_noc_grp_id; - PartitionRegion temp_cluster_pr; - e_block_pack_status pack_result = e_block_pack_status::BLK_STATUS_UNDEFINED; - t_ext_pin_util target_ext_pin_util = helper_ctx.target_external_pin_util.get_pin_util(cluster_ctx.clb_nlist.block_type(new_clb)->name); - t_logical_block_type_ptr block_type = cluster_ctx.clb_nlist.block_type(new_clb); - t_pb* temp_pb = cluster_ctx.clb_nlist.block_pb(new_clb); - - //re-build cluster placement stats - rebuild_cluster_placement_stats(new_clb, new_clb_atoms); - if (!check_free_primitives_for_molecule_atoms(molecule, &(helper_ctx.cluster_placement_stats[block_type->index]))) - return false; - - //re-build router_data structure for this cluster - router_data = lb_load_router_data(helper_ctx.lb_type_rr_graphs, new_clb, new_clb_atoms); - - pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[block_type->index]), - molecule, - helper_ctx.primitives_list, - temp_pb, - helper_ctx.num_models, - helper_ctx.max_cluster_size, - new_clb, - detailed_routing_stage, - router_data, - 0, - enable_pin_feasibility_filter, - //false, - helper_ctx.feasible_block_array_size, - target_ext_pin_util, - temp_cluster_pr, - temp_cluster_noc_grp_id, - force_site); - - // If clustering succeeds, add it to the clb netlist - if (pack_result == e_block_pack_status::BLK_PASSED) { - //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist. - if (during_packing) { - free_intra_lb_nets(clustering_data.intra_lb_routing[new_clb]); - clustering_data.intra_lb_routing[new_clb] = router_data->saved_lb_nets; - router_data->saved_lb_nets = nullptr; - } else { - cluster_ctx.clb_nlist.block_pb(new_clb)->pb_route.clear(); - cluster_ctx.clb_nlist.block_pb(new_clb)->pb_route = alloc_and_load_pb_route(router_data->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(new_clb)->pb_graph_node); - } - - for (int i_atom = 0; i_atom < molecule_size; i_atom++) { - if (molecule->atom_block_ids[i_atom]) { - new_clb_atoms.insert(molecule->atom_block_ids[i_atom]); - } - } - update_cluster_pb_stats(molecule, molecule_size, new_clb, true); - } - - //Free clustering router data - free_router_data(router_data); - router_data = nullptr; - - return (pack_result == e_block_pack_status::BLK_PASSED); -} - -void fix_clustered_netlist(t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - ClusterBlockId new_clb) { - fix_cluster_port_after_moving(new_clb); - fix_cluster_net_after_moving(molecule, molecule_size, old_clb, new_clb); -} - -void revert_mol_move(ClusterBlockId old_clb, - t_pack_molecule* molecule, - t_lb_router_data*& old_router_data, - bool during_packing, - t_clustering_data& clustering_data) { - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - NocGroupId temp_cluster_noc_grp_id_original; - PartitionRegion temp_cluster_pr_original; - e_block_pack_status pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(old_clb)->index]), - molecule, - helper_ctx.primitives_list, - cluster_ctx.clb_nlist.block_pb(old_clb), - helper_ctx.num_models, - helper_ctx.max_cluster_size, - old_clb, - E_DETAILED_ROUTE_FOR_EACH_ATOM, - old_router_data, - 0, - helper_ctx.enable_pin_feasibility_filter, - helper_ctx.feasible_block_array_size, - helper_ctx.target_external_pin_util.get_pin_util(cluster_ctx.clb_nlist.block_type(old_clb)->name), - temp_cluster_pr_original, - temp_cluster_noc_grp_id_original); - - VTR_ASSERT(pack_result == e_block_pack_status::BLK_PASSED); - //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist. - if (during_packing) { - free_intra_lb_nets(clustering_data.intra_lb_routing[old_clb]); - clustering_data.intra_lb_routing[old_clb] = old_router_data->saved_lb_nets; - old_router_data->saved_lb_nets = nullptr; - } else { - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route.clear(); - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route = alloc_and_load_pb_route(old_router_data->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(old_clb)->pb_graph_node); - } - - free_router_data(old_router_data); - old_router_data = nullptr; -} -/*******************************************/ -/************ static functions *************/ -/*******************************************/ - -static void fix_cluster_net_after_moving(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - ClusterBlockId new_clb) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - AtomNetId atom_net_id; - ClusterPinId cluster_pin; - - //remove all old cluster pin from their nets - ClusterNetId cur_clb_net; - for (auto& old_clb_pin : cluster_ctx.clb_nlist.block_pins(old_clb)) { - cur_clb_net = cluster_ctx.clb_nlist.pin_net(old_clb_pin); - cluster_ctx.clb_nlist.remove_net_pin(cur_clb_net, old_clb_pin); - } - - //delete cluster nets that are no longer used - for (int i_atom = 0; i_atom < molecule_size; i_atom++) { - if (molecule->atom_block_ids[i_atom]) { - for (auto atom_pin : atom_ctx.nlist.block_pins(molecule->atom_block_ids[i_atom])) { - atom_net_id = atom_ctx.nlist.pin_net(atom_pin); - auto [previously_absorbed, now_absorbed] = check_net_absorption(atom_net_id, new_clb, old_clb, cluster_pin); - - if (!previously_absorbed && now_absorbed) { - cur_clb_net = cluster_ctx.clb_nlist.pin_net(cluster_pin); - cluster_ctx.clb_nlist.remove_net(cur_clb_net); - } - } - } - } - - //Fix cluster pin for old and new clbs - fix_cluster_pins_after_moving(old_clb); - fix_cluster_pins_after_moving(new_clb); - - for (AtomBlockId atom_blk : cluster_to_atoms(old_clb)) - fix_atom_pin_mapping(atom_blk); - - for (AtomBlockId atom_blk : cluster_to_atoms(new_clb)) - fix_atom_pin_mapping(atom_blk); - - cluster_ctx.clb_nlist.remove_and_compress(); - load_internal_to_block_net_nums(cluster_ctx.clb_nlist.block_type(old_clb), cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route); - load_internal_to_block_net_nums(cluster_ctx.clb_nlist.block_type(new_clb), cluster_ctx.clb_nlist.block_pb(new_clb)->pb_route); -} - -static void fix_cluster_port_after_moving(const ClusterBlockId clb_index) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - const t_pb* pb = cluster_ctx.clb_nlist.block_pb(clb_index); - - while (!pb->is_root()) { - pb = pb->parent_pb; - } - - size_t num_old_ports = cluster_ctx.clb_nlist.block_ports(clb_index).size(); - const t_pb_type* pb_type = pb->pb_graph_node->pb_type; - - for (size_t port = num_old_ports; port < (unsigned)pb_type->num_ports; port++) { - if (pb_type->ports[port].is_clock && pb_type->ports[port].type == IN_PORT) { - cluster_ctx.clb_nlist.create_port(clb_index, pb_type->ports[port].name, pb_type->ports[port].num_pins, PortType::CLOCK); - } else if (!pb_type->ports[port].is_clock && pb_type->ports[port].type == IN_PORT) { - cluster_ctx.clb_nlist.create_port(clb_index, pb_type->ports[port].name, pb_type->ports[port].num_pins, PortType::INPUT); - } else { - VTR_ASSERT(pb_type->ports[port].type == OUT_PORT); - cluster_ctx.clb_nlist.create_port(clb_index, pb_type->ports[port].name, pb_type->ports[port].num_pins, PortType::OUTPUT); - } - } - - num_old_ports = cluster_ctx.clb_nlist.block_ports(clb_index).size(); -} - -static void fix_cluster_pins_after_moving(const ClusterBlockId clb_index) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - const t_pb* pb = cluster_ctx.clb_nlist.block_pb(clb_index); - t_pb_graph_pin* pb_graph_pin; - AtomNetId atom_net_id; - ClusterNetId clb_net_id; - - t_logical_block_type_ptr block_type = cluster_ctx.clb_nlist.block_type(clb_index); - - int num_input_ports = pb->pb_graph_node->num_input_ports; - int num_output_ports = pb->pb_graph_node->num_output_ports; - int num_clock_ports = pb->pb_graph_node->num_clock_ports; - - int iport, ipb_pin, ipin, rr_node_index; - - ipin = 0; - // iterating over input ports - for (iport = 0; iport < num_input_ports; iport++) { - ClusterPortId input_port_id = cluster_ctx.clb_nlist.find_port(clb_index, block_type->pb_type->ports[iport].name); - // iterating over physical block pins of each input port - for (ipb_pin = 0; ipb_pin < pb->pb_graph_node->num_input_pins[iport]; ipb_pin++) { - pb_graph_pin = &pb->pb_graph_node->input_pins[iport][ipb_pin]; - rr_node_index = pb_graph_pin->pin_count_in_cluster; - - VTR_ASSERT(pb_graph_pin->pin_count_in_cluster == ipin); - if (pb->pb_route.count(rr_node_index)) { - atom_net_id = pb->pb_route[rr_node_index].atom_net_id; - if (atom_net_id) { - clb_net_id = cluster_ctx.clb_nlist.create_net(atom_ctx.nlist.net_name(atom_net_id)); - atom_ctx.lookup.set_atom_clb_net(atom_net_id, clb_net_id); - ClusterPinId cur_pin_id = cluster_ctx.clb_nlist.find_pin(input_port_id, (BitIndex)ipb_pin); - if (!cur_pin_id) - cluster_ctx.clb_nlist.create_pin(input_port_id, (BitIndex)ipb_pin, clb_net_id, PinType::SINK, ipin); - else - cluster_ctx.clb_nlist.set_pin_net(cur_pin_id, PinType::SINK, clb_net_id); - } - cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route[rr_node_index].pb_graph_pin = pb_graph_pin; - } - ipin++; - } - } - - // iterating over output ports - for (iport = 0; iport < num_output_ports; iport++) { - ClusterPortId output_port_id = cluster_ctx.clb_nlist.find_port(clb_index, block_type->pb_type->ports[num_input_ports + iport].name); - // iterating over physical block pins of each output port - for (ipb_pin = 0; ipb_pin < pb->pb_graph_node->num_output_pins[iport]; ipb_pin++) { - pb_graph_pin = &pb->pb_graph_node->output_pins[iport][ipb_pin]; - rr_node_index = pb_graph_pin->pin_count_in_cluster; - - VTR_ASSERT(pb_graph_pin->pin_count_in_cluster == ipin); - if (pb->pb_route.count(rr_node_index)) { - atom_net_id = pb->pb_route[rr_node_index].atom_net_id; - if (atom_net_id) { - clb_net_id = cluster_ctx.clb_nlist.create_net(atom_ctx.nlist.net_name(atom_net_id)); - atom_ctx.lookup.set_atom_clb_net(atom_net_id, clb_net_id); - ClusterPinId cur_pin_id = cluster_ctx.clb_nlist.find_pin(output_port_id, (BitIndex)ipb_pin); - AtomPinId atom_net_driver = atom_ctx.nlist.net_driver(atom_net_id); - bool driver_is_constant = atom_ctx.nlist.pin_is_constant(atom_net_driver); - if (!cur_pin_id) - cluster_ctx.clb_nlist.create_pin(output_port_id, (BitIndex)ipb_pin, clb_net_id, PinType::DRIVER, ipin, driver_is_constant); - else { - cluster_ctx.clb_nlist.set_pin_net(cur_pin_id, PinType::DRIVER, clb_net_id); - cluster_ctx.clb_nlist.set_pin_is_constant(cur_pin_id, driver_is_constant); - } - VTR_ASSERT(cluster_ctx.clb_nlist.net_is_constant(clb_net_id) == driver_is_constant); - } - cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route[rr_node_index].pb_graph_pin = pb_graph_pin; - } - ipin++; - } - } - - // iterating over clock ports - for (iport = 0; iport < num_clock_ports; iport++) { - ClusterPortId clock_port_id = cluster_ctx.clb_nlist.find_port(clb_index, block_type->pb_type->ports[num_input_ports + num_output_ports + iport].name); - // iterating over physical block pins of each clock port - for (ipb_pin = 0; ipb_pin < pb->pb_graph_node->num_clock_pins[iport]; ipb_pin++) { - pb_graph_pin = &pb->pb_graph_node->clock_pins[iport][ipb_pin]; - rr_node_index = pb_graph_pin->pin_count_in_cluster; - - VTR_ASSERT(pb_graph_pin->pin_count_in_cluster == ipin); - if (pb->pb_route.count(rr_node_index)) { - atom_net_id = pb->pb_route[rr_node_index].atom_net_id; - if (atom_net_id) { - clb_net_id = cluster_ctx.clb_nlist.create_net(atom_ctx.nlist.net_name(atom_net_id)); - atom_ctx.lookup.set_atom_clb_net(atom_net_id, clb_net_id); - ClusterPinId cur_pin_id = cluster_ctx.clb_nlist.find_pin(clock_port_id, (BitIndex)ipb_pin); - if (!cur_pin_id) - cluster_ctx.clb_nlist.create_pin(clock_port_id, (BitIndex)ipb_pin, clb_net_id, PinType::SINK, ipin); - else - cluster_ctx.clb_nlist.set_pin_net(cur_pin_id, PinType::SINK, clb_net_id); - } - cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route[rr_node_index].pb_graph_pin = pb_graph_pin; - } - ipin++; - } - } -} - -static std::pair check_net_absorption(const AtomNetId atom_net_id, - const ClusterBlockId new_clb, - const ClusterBlockId old_clb, - ClusterPinId& cluster_pin_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - AtomBlockId atom_block_id; - ClusterBlockId clb_index; - - ClusterNetId clb_net_id = atom_ctx.lookup.clb_net(atom_net_id); - - bool previously_absorbed; - if (clb_net_id == ClusterNetId::INVALID()) - previously_absorbed = true; - else { - previously_absorbed = false; - for (auto& cluster_pin : cluster_ctx.clb_nlist.net_pins(clb_net_id)) { - if (cluster_pin && cluster_ctx.clb_nlist.pin_block(cluster_pin) == old_clb) { - cluster_pin_id = cluster_pin; - break; - } - } - } - - //iterate over net pins and check their cluster - bool now_absorbed = true; - for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net_id)) { - atom_block_id = atom_ctx.nlist.pin_block(net_pin); - clb_index = atom_ctx.lookup.atom_clb(atom_block_id); - - if (clb_index != new_clb) { - now_absorbed = false; - break; - } - } - - return {previously_absorbed, now_absorbed}; -} - -static void fix_atom_pin_mapping(const AtomBlockId blk) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - const t_pb* pb = atom_ctx.lookup.atom_pb(blk); - VTR_ASSERT_MSG(pb, "Atom block must have a matching PB"); - - const t_pb_graph_node* gnode = pb->pb_graph_node; - VTR_ASSERT_MSG(gnode->pb_type->model == atom_ctx.nlist.block_model(blk), - "Atom block PB must match BLIF model"); - - for (int iport = 0; iport < gnode->num_input_ports; ++iport) { - if (gnode->num_input_pins[iport] <= 0) continue; - - const AtomPortId port = atom_ctx.nlist.find_atom_port(blk, gnode->input_pins[iport][0].port->model_port); - if (!port) continue; - - for (int ipin = 0; ipin < gnode->num_input_pins[iport]; ++ipin) { - const t_pb_graph_pin* gpin = &gnode->input_pins[iport][ipin]; - VTR_ASSERT(gpin); - - set_atom_pin_mapping(cluster_ctx.clb_nlist, blk, port, gpin); - } - } - - for (int iport = 0; iport < gnode->num_output_ports; ++iport) { - if (gnode->num_output_pins[iport] <= 0) continue; - - const AtomPortId port = atom_ctx.nlist.find_atom_port(blk, gnode->output_pins[iport][0].port->model_port); - if (!port) continue; - - for (int ipin = 0; ipin < gnode->num_output_pins[iport]; ++ipin) { - const t_pb_graph_pin* gpin = &gnode->output_pins[iport][ipin]; - VTR_ASSERT(gpin); - - set_atom_pin_mapping(cluster_ctx.clb_nlist, blk, port, gpin); - } - } - - for (int iport = 0; iport < gnode->num_clock_ports; ++iport) { - if (gnode->num_clock_pins[iport] <= 0) continue; - - const AtomPortId port = atom_ctx.nlist.find_atom_port(blk, gnode->clock_pins[iport][0].port->model_port); - if (!port) continue; - - for (int ipin = 0; ipin < gnode->num_clock_pins[iport]; ++ipin) { - const t_pb_graph_pin* gpin = &gnode->clock_pins[iport][ipin]; - VTR_ASSERT(gpin); - - set_atom_pin_mapping(cluster_ctx.clb_nlist, blk, port, gpin); - } - } -} - -static void load_internal_to_block_net_nums(const t_logical_block_type_ptr type, t_pb_routes& pb_route) { - int num_pins = type->pb_graph_head->total_pb_pins; - - for (int ipb_pin = 0; ipb_pin < num_pins; ipb_pin++) { - if (!pb_route.count(ipb_pin)) continue; - - if (pb_route[ipb_pin].driver_pb_pin_id != OPEN) { - load_atom_index_for_pb_pin(pb_route, ipb_pin); - } - } -} - -static void load_atom_index_for_pb_pin(t_pb_routes& pb_route, int ipin) { - int driver = pb_route[ipin].driver_pb_pin_id; - - VTR_ASSERT(driver != OPEN); - //VTR_ASSERT(!pb_route[ipin].atom_net_id); - - if (!pb_route[driver].atom_net_id) { - load_atom_index_for_pb_pin(pb_route, driver); - } - - //Store the net coming from the driver - pb_route[ipin].atom_net_id = pb_route[driver].atom_net_id; - - //Store ourselves with the driver - pb_route[driver].sink_pb_pin_ids.push_back(ipin); -} - -#if 0 -static bool count_children_pbs(const t_pb* pb) { - if (pb == nullptr) - return 0; - - for (int i = 0; i < pb->get_num_child_types(); i++) { - for (int j = 0; j < pb->get_num_children_of_type(i); j++) { - if (pb->child_pbs[i] != nullptr && pb->child_pbs[i][j].name != nullptr) { - return true; - } - } - } - return false; -} -#endif - -static void rebuild_cluster_placement_stats(ClusterBlockId clb_index, - const std::unordered_set& clb_atoms) { - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& atom_ctx = g_vpr_ctx.atom(); - - t_cluster_placement_stats* cluster_placement_stats = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]); - reset_cluster_placement_stats(cluster_placement_stats); - set_mode_cluster_placement_stats(cluster_ctx.clb_nlist.block_pb(clb_index)->pb_graph_node, cluster_ctx.clb_nlist.block_pb(clb_index)->mode); - - for (AtomBlockId atom : clb_atoms) { - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(atom); - commit_primitive(cluster_placement_stats, atom_pb->pb_graph_node); - } -} - -bool is_cluster_legal(t_lb_router_data*& router_data) { - return (check_cluster_legality(0, E_DETAILED_ROUTE_AT_END_ONLY, router_data)); -} - -void commit_mol_removal(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - bool during_packing, - t_lb_router_data*& router_data, - t_clustering_data& clustering_data) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - for (int i_atom = 0; i_atom < molecule_size; i_atom++) { - if (molecule->atom_block_ids[i_atom]) { - revert_place_atom_block(molecule->atom_block_ids[i_atom], router_data); - } - } - - cleanup_pb(cluster_ctx.clb_nlist.block_pb(old_clb)); - - //If you are still in packing, update the clustering data. Otherwise, update the clustered netlist. - if (during_packing) { - free_intra_lb_nets(clustering_data.intra_lb_routing[old_clb]); - clustering_data.intra_lb_routing[old_clb] = router_data->saved_lb_nets; - router_data->saved_lb_nets = nullptr; - } else { - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route.clear(); - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route = alloc_and_load_pb_route(router_data->saved_lb_nets, - cluster_ctx.clb_nlist.block_pb(old_clb)->pb_graph_node); - } -} - -bool check_type_and_mode_compatibility(ClusterBlockId old_clb, - ClusterBlockId new_clb, - int verbosity) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - - //Check that the old and new clusters are the same type - if (cluster_ctx.clb_nlist.block_type(old_clb) != cluster_ctx.clb_nlist.block_type(new_clb)) { - VTR_LOGV(verbosity > 4, "Move aborted. New and old cluster blocks are not of the same type"); - return false; - } - - //Check that the old and new clusters are the mode - if (cluster_ctx.clb_nlist.block_pb(old_clb)->mode != cluster_ctx.clb_nlist.block_pb(new_clb)->mode) { - VTR_LOGV(verbosity > 4, "Move aborted. New and old cluster blocks are not of the same mode"); - return false; - } - - return true; -} - -static void update_cluster_pb_stats(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId clb_index, - bool is_added) { - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - t_pb* cur_pb; - - for (int iblock = 0; iblock < molecule_size; iblock++) { - auto blk_id = molecule->atom_block_ids[iblock]; - if (!blk_id) { - continue; - } - - //Update atom netlist mapping - if (is_added) { - atom_ctx.lookup.set_atom_clb(blk_id, clb_index); - } else { - atom_ctx.lookup.set_atom_clb(blk_id, ClusterBlockId::INVALID()); - } - - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); - VTR_ASSERT(atom_pb); - - cur_pb = atom_pb->parent_pb; - - while (cur_pb) { - /* reset list of feasible blocks */ - cur_pb->pb_stats->num_feasible_blocks = NOT_VALID; - if (is_added) - cur_pb->pb_stats->num_child_blocks_in_pb++; - else - cur_pb->pb_stats->num_child_blocks_in_pb--; - - cur_pb = cur_pb->parent_pb; - } - } -} - -std::unordered_set& cluster_to_mutable_atoms(ClusterBlockId cluster) { - auto& helper_ctx = g_vpr_ctx.mutable_cl_helper(); - - //If the lookup is not built yet, build it first - if (helper_ctx.atoms_lookup.empty()) - init_clb_atoms_lookup(helper_ctx.atoms_lookup); - - return helper_ctx.atoms_lookup[cluster]; -} \ No newline at end of file diff --git a/vpr/src/pack/re_cluster_util.h b/vpr/src/pack/re_cluster_util.h deleted file mode 100644 index bc940dca881..00000000000 --- a/vpr/src/pack/re_cluster_util.h +++ /dev/null @@ -1,212 +0,0 @@ -#ifndef RE_CLUSTER_UTIL_H -#define RE_CLUSTER_UTIL_H - -#include "clustered_netlist_fwd.h" -#include "clustered_netlist_utils.h" -#include "atom_netlist_fwd.h" -#include "globals.h" -#include "pack_types.h" -#include "cluster_util.h" -/** - * @file - * @brief This files defines some helper functions for the re-clustering API - * - * Re-clustering API is used to move atoms between clusters after the cluster is done. - * This can be very used in iteratively improve the packed solution after the initial clustering is done. - * It can also be used during placement to allow fine-grained moves that can move a BLE or a single FF/LUT. - * - * Note: Some of the helper functions defined here might be useful in different places in VPR. - */ - -/** - * @brief A function that returns the block ID in the clustered netlist - * from its ID in the atom netlist. - */ -ClusterBlockId atom_to_cluster(AtomBlockId atom); - -/** - * @brief A function that return a list of atoms in a cluster - * @note This function can be called only after cluster/packing is done or - * the clustered netlist is created. - * @return Atoms in the given cluster. The returned set is immutable. - */ -const std::unordered_set& cluster_to_atoms(ClusterBlockId cluster); - -/** - * @brief A function that return a list of atoms in a cluster - * @note This function can be called only after cluster/packing is done or - * the clustered netlist is created. - * @return Atoms in the given cluster. The returned set is mutable. - */ -std::unordered_set& cluster_to_mutable_atoms(ClusterBlockId cluster); - -/** - * @brief A function that loads the intra-cluster router data of one cluster - */ -t_lb_router_data* lb_load_router_data(std::vector* lb_type_rr_graphs, - ClusterBlockId clb_index, - const std::unordered_set& clb_atoms); - -/** - * @brief A function that removes a molecule from a cluster and checks legality of - * the old cluster. - * - * It returns true if the removal is done and the old cluster is legal. - * It aborts the removal and returns false if the removal will make the old cluster - * illegal. - * - * This function updates the intra-logic block router data structure (router_data) and - * remove all the atoms of the molecule from old_clb_atoms vector. - * - * @param old_clb: The original cluster of this molecule - * @param old_clb_atoms: A vector containing the list of atoms in the old cluster of the molecule. - * It will be updated in the function to remove the atoms of molecule from it. - * @param router_data: returns the intra logic block router data. - */ -void remove_mol_from_cluster(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId& old_clb, - std::unordered_set& old_clb_atoms, - bool router_data_ready, - t_lb_router_data*& router_data); - -/** - * @brief A function that starts a new cluster for one specific molecule - * - * It places the molecule in a specific type and mode that should be passed by - * the higher level routine. - * - * @param type: the cluster block type needed - * @param mode: the mode of the new cluster - * @param clb_index: the cluster block Id of the newly created cluster block - * @param during_packing: true if this function is called during packing, false if it is called during placement - * @param clustering_data: A data structure containing helper data for the clustering process - * (is updated if this function is called during packing, especially intra_lb_routing data member). - * @param router_data: returns the intra logic block router data. - * @param temp_cluster_pr: returns the partition region of the new cluster. - * @param temp_cluster_noc_grp_id returns the NoC group ID of the new cluster - * @param detailed_routing_stage: options are E_DETAILED_ROUTE_FOR_EACH_ATOM (default) and E_DETAILED_ROUTE_AT_END_ONLY. - * This argument specifies whether or not to run an intra-cluster routing-based legality - * check after adding the molecule to the cluster; default is the more conservative option. - * This argument is passed down to try_pack_mol; if E_DETAILED_ROUTE_AT_END_ONLY is passed, - * the function does not run a detailed intra-cluster routing-based legality check. - * If many molecules will be added to a cluster, this option enables use of a single - * routing check on the completed cluster (vs many incremental checks). - * @param force_site: optional user-specified primitive site on which to place the molecule; this is passed to - * try_pack_molecule and then to get_next_primitive_site. If a force_site argument is provided, - * the molecule is either placed on the specified site or fails to add to the cluster. - * If the force_site argument is set to its default value (-1), vpr selects an available site. - */ -bool start_new_cluster_for_mol(t_pack_molecule* molecule, - t_logical_block_type_ptr type, - int mode, - int feasible_block_array_size, - bool enable_pin_feasibility_filter, - ClusterBlockId clb_index, - bool during_packing, - int verbosity, - t_clustering_data& clustering_data, - t_lb_router_data** router_data, - PartitionRegion& temp_cluster_pr, - NocGroupId& temp_cluster_noc_grp_id, - enum e_detailed_routing_stages detailed_routing_stage = E_DETAILED_ROUTE_FOR_EACH_ATOM, - int force_site = -1); - -/** - * @brief A function that packs a molecule into an existing cluster - * - * @param clb_index: the cluster block Id of the new cluster that we need to pack the molecule in. - * @param: clb_atoms: A vector containing the list of atoms in the new cluster block before adding the molecule. - * @param during_packing: true if this function is called during packing, false if it is called during placement - * @param is_swap: true if this function is called during swapping two molecules. False if the called during a single molecule move - * @param clustering_data: A data structure containing helper data for the clustering process - * (is updated if this function is called during packing, especially intra_lb_routing data member). - * @param router_data: returns the intra logic block router data. - * @param temp_cluster_noc_grp_id returns the NoC group ID of the new cluster - * @param detailed_routing_stage: options are E_DETAILED_ROUTE_FOR_EACH_ATOM (default) and E_DETAILED_ROUTE_AT_END_ONLY. - * This argument specifies whether or not to run an intra-cluster routing-based legality - * check after adding the molecule to the cluster; default is the more conservative option. - * This argument is passed down to try_pack_mol; if E_DETAILED_ROUTE_AT_END_ONLY is passed, - * the function does not run a detailed intra-cluster routing-based legality check. - * If many molecules will be added to a cluster, this option enables use of a single - * routing check on the completed cluster (vs many incremental checks). - * @param enable_pin_feasibility_filter: do a pin couting based legality check (before or in place of intra-cluster routing check). - * @param force_site: optional user-specified primitive site on which to place the molecule; this is passed to - * try_pack_molecule and then to get_next_primitive_site. If a force_site argument is provided, - * the molecule is either placed on the specified site or fails to add to the cluster. - * If the force_site argument is set to its default value (-1), vpr selects an available site. - */ -bool pack_mol_in_existing_cluster(t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId new_clb, - std::unordered_set& new_clb_atoms, - bool during_packing, - t_clustering_data& clustering_data, - t_lb_router_data*& router_data, - enum e_detailed_routing_stages detailed_routing_stage = E_DETAILED_ROUTE_FOR_EACH_ATOM, - bool enable_pin_feasibility_filter = true, - int force_site = -1); - -/** - * @brief A function that fix the clustered netlist if the move is performed - * after the packing is done and clustered netlist is built - * - * If you are changing clustering after packing is done, you need to update the clustered netlist by - * deleting the newly absorbed nets and creating nets for the atom nets that become unabsorbed. It also - * fixes the cluster ports for both the old and new clusters. - */ -void fix_clustered_netlist(t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - ClusterBlockId new_clb); - -/** - * @brief A function that commits the molecule move if it is legal - * - * @params during_packing: true if this function is called during packing, false if it is called during placement - * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster) - */ -void commit_mol_move(ClusterBlockId old_clb, - ClusterBlockId new_clb, - bool during_packing, - bool new_clb_created); - -/** - * @brief A function that reverts the molecule move if it is illegal - * - * @params during_packing: true if this function is called during packing, false if it is called during placement - * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster) - * @params - */ -void revert_mol_move(ClusterBlockId old_clb, - t_pack_molecule* molecule, - t_lb_router_data*& old_router_data, - bool during_packing, - t_clustering_data& clustering_data); - -/** - * @brief A function that checks the legality of a cluster by running the intra-cluster routing - */ -bool is_cluster_legal(t_lb_router_data*& router_data); - -/** - * @brief A function that commits the molecule removal if it is legal - * - * @params during_packing: true if this function is called during packing, false if it is called during placement - * @params new_clb_created: true if the move is creating a new cluster (e.g. move_mol_to_new_cluster) - */ -void commit_mol_removal(const t_pack_molecule* molecule, - int molecule_size, - ClusterBlockId old_clb, - bool during_packing, - t_lb_router_data*& router_data, - t_clustering_data& clustering_data); - -/** - * @brief A function that check that two clusters are of the same type and in the same mode of operation - */ -bool check_type_and_mode_compatibility(ClusterBlockId old_clb, - ClusterBlockId new_clb, - int verbosity); - -#endif diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 2cd55402d47..c61db2ee0fc 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -1589,27 +1589,21 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, bool cube_bb; const int number_layers = g_vpr_ctx.device().grid.get_num_layers(); - // If the FPGA has only one layer, then we can only use cube bounding box - if (number_layers == 1) { - cube_bb = true; - } else { - VTR_ASSERT(number_layers > 1); - if (place_bb_mode == AUTO_BB) { - // If the auto_bb is used, we analyze the RR graph to see whether is there any inter-layer connection that is not - // originated from OPIN. If there is any, cube BB is chosen, otherwise, per-layer bb is chosen. - if (inter_layer_connections_limited_to_opin(rr_graph)) { - cube_bb = false; - } else { - cube_bb = true; - } - } else if (place_bb_mode == CUBE_BB) { - // The user has specifically asked for CUBE_BB - cube_bb = true; - } else { - // The user has specifically asked for PER_LAYER_BB - VTR_ASSERT_SAFE(place_bb_mode == PER_LAYER_BB); + if (place_bb_mode == AUTO_BB) { + // If the auto_bb is used, we analyze the RR graph to see whether is there any inter-layer connection that is not + // originated from OPIN. If there is any, cube BB is chosen, otherwise, per-layer bb is chosen. + if (number_layers > 1 && inter_layer_connections_limited_to_opin(rr_graph)) { cube_bb = false; + } else { + cube_bb = true; } + } else if (place_bb_mode == CUBE_BB) { + // The user has specifically asked for CUBE_BB + cube_bb = true; + } else { + // The user has specifically asked for PER_LAYER_BB + VTR_ASSERT_SAFE(place_bb_mode == PER_LAYER_BB); + cube_bb = false; } return cube_bb; @@ -2074,14 +2068,15 @@ static int check_block_placement_consistency(const BlkLocRegistry& blk_loc_regis || !is_sub_tile_compatible(physical_tile, logical_block, loc.sub_tile)) { VTR_LOG_ERROR( - "Block %zu's location is (%d,%d,%d) but found in grid at (%zu,%zu,%d,%d).\n", + "Block %zu's location is (%d,%d,%d,%d) but found in grid at (%d,%d,%d,%d).\n", size_t(bnum), loc.x, loc.y, loc.sub_tile, - tile_loc.x, - tile_loc.y, - tile_loc.layer_num, + loc.layer, + i, + j, + k, layer_num); error++; } diff --git a/vpr/src/place/place_constraints.cpp b/vpr/src/place/place_constraints.cpp index 22b15f5a04f..94af4721026 100644 --- a/vpr/src/place/place_constraints.cpp +++ b/vpr/src/place/place_constraints.cpp @@ -11,7 +11,7 @@ #include "globals.h" #include "place_constraints.h" #include "place_util.h" -#include "re_cluster_util.h" +#include "vpr_context.h" int check_placement_floorplanning(const vtr::vector_map& block_locs) { int error = 0; @@ -221,12 +221,12 @@ bool cluster_floorplanning_legal(ClusterBlockId blk_id, const t_pl_loc& loc) { void load_cluster_constraints() { auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - auto& cluster_ctx = g_vpr_ctx.clustering(); + const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering(); floorplanning_ctx.cluster_constraints.resize(cluster_ctx.clb_nlist.blocks().size()); for (auto cluster_id : cluster_ctx.clb_nlist.blocks()) { - const std::unordered_set& atoms = cluster_to_atoms(cluster_id); + const std::unordered_set& atoms = cluster_ctx.atoms_lookup[cluster_id]; PartitionRegion empty_pr; floorplanning_ctx.cluster_constraints[cluster_id] = empty_pr; diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/place_delay_model.cpp index ea21d581273..aa6896a62fe 100644 --- a/vpr/src/place/place_delay_model.cpp +++ b/vpr/src/place/place_delay_model.cpp @@ -30,34 +30,27 @@ float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin int delta_x = std::abs(from_loc.x - to_loc.x); int delta_y = std::abs(from_loc.y - to_loc.y); - // TODO: This is compatible with the case that only OPINs are connected to other layers. - // Ideally, I should check whether OPINs are conneced or IPINs and use the correct layer. - // If both are connected, minimum should be taken. In the case that channels are also connected, - // I haven't thought about what to do. - float cross_layer_td = 0; - if (from_loc.layer_num != to_loc.layer_num) { - VTR_ASSERT(std::isfinite(cross_layer_delay_)); - cross_layer_td = cross_layer_delay_; - } - return delays_[to_loc.layer_num][delta_x][delta_y] + cross_layer_td; + return delays_[from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; } void DeltaDelayModel::dump_echo(std::string filepath) const { FILE* f = vtr::fopen(filepath.c_str(), "w"); fprintf(f, " "); - for (size_t layer_num = 0; layer_num < delays_.dim_size(0); ++layer_num) { - fprintf(f, " %9zu", layer_num); - fprintf(f, "\n"); - for (size_t dx = 0; dx < delays_.dim_size(1); ++dx) { - fprintf(f, " %9zu", dx); - } - fprintf(f, "\n"); - for (size_t dy = 0; dy < delays_.dim_size(2); ++dy) { - fprintf(f, "%9zu", dy); - for (size_t dx = 0; dx < delays_.dim_size(1); ++dx) { - fprintf(f, " %9.2e", delays_[layer_num][dx][dy]); + for (size_t from_layer_num = 0; from_layer_num < delays_.dim_size(0); ++from_layer_num) { + for (size_t to_layer_num = 0; to_layer_num < delays_.dim_size(1); ++to_layer_num) { + fprintf(f, " %9zu", from_layer_num); + fprintf(f, "\n"); + for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { + fprintf(f, " %9zu", dx); } fprintf(f, "\n"); + for (size_t dy = 0; dy < delays_.dim_size(3); ++dy) { + fprintf(f, "%9zu", dy); + for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { + fprintf(f, " %9.2e", delays_[from_layer_num][to_layer_num][dx][dy]); + } + fprintf(f, "\n"); + } } } vtr::fclose(f); @@ -241,7 +234,7 @@ void DeltaDelayModel::read(const std::string& file) { // // The second argument should be of type Matrix::Reader where X is the // capnproto element type. - ToNdMatrix<3, VprFloatEntry, float>(&delays_, model.getDelays(), ToFloat); + ToNdMatrix<4, VprFloatEntry, float>(&delays_, model.getDelays(), ToFloat); } void DeltaDelayModel::write(const std::string& file) const { @@ -257,7 +250,7 @@ void DeltaDelayModel::write(const std::string& file) const { // Matrix message. It is the mirror function of ToNdMatrix described in // read above. auto delay_values = model.getDelays(); - FromNdMatrix<3, VprFloatEntry, float>(&delay_values, delays_, FromFloat); + FromNdMatrix<4, VprFloatEntry, float>(&delay_values, delays_, FromFloat); // writeMessageToFile writes message to the specified file. writeMessageToFile(file, &builder); @@ -270,9 +263,9 @@ void OverrideDelayModel::read(const std::string& file) { ::capnp::ReaderOptions opts = default_large_capnp_opts(); ::capnp::FlatArrayMessageReader reader(f.getData(), opts); - vtr::NdMatrix delays; + vtr::NdMatrix delays; auto model = reader.getRoot(); - ToNdMatrix<3, VprFloatEntry, float>(&delays, model.getDelays(), ToFloat); + ToNdMatrix<4, VprFloatEntry, float>(&delays, model.getDelays(), ToFloat); base_delay_model_ = std::make_unique(cross_layer_delay_, delays, is_flat_); @@ -300,7 +293,7 @@ void OverrideDelayModel::write(const std::string& file) const { auto model = builder.initRoot(); auto delays = model.getDelays(); - FromNdMatrix<3, VprFloatEntry, float>(&delays, base_delay_model_->delays(), FromFloat); + FromNdMatrix<4, VprFloatEntry, float>(&delays, base_delay_model_->delays(), FromFloat); // Non-scalar capnproto fields should be first initialized with // init(count), and then accessed from the returned diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/place_delay_model.h index 5f61b856405..05fba845f0a 100644 --- a/vpr/src/place/place_delay_model.h +++ b/vpr/src/place/place_delay_model.h @@ -95,7 +95,7 @@ class DeltaDelayModel : public PlaceDelayModel { : cross_layer_delay_(min_cross_layer_delay) , is_flat_(is_flat) {} DeltaDelayModel(float min_cross_layer_delay, - vtr::NdMatrix delta_delays, + vtr::NdMatrix delta_delays, bool is_flat) : delays_(std::move(delta_delays)) , cross_layer_delay_(min_cross_layer_delay) @@ -111,15 +111,12 @@ class DeltaDelayModel : public PlaceDelayModel { void read(const std::string& file) override; void write(const std::string& file) const override; - const vtr::NdMatrix& delays() const { + const vtr::NdMatrix& delays() const { return delays_; } private: - vtr::NdMatrix delays_; // [0..num_layers-1][0..max_dx][0..max_dy] - /** - * @brief The minimum delay of inter-layer connections - */ + vtr::NdMatrix delays_; // [0..num_layers-1][0..max_dx][0..max_dy] float cross_layer_delay_; /** * @brief Indicates whether the router is a two-stage or run-flat diff --git a/vpr/src/place/place_timing_update.cpp b/vpr/src/place/place_timing_update.cpp index 8c941bd1d81..6c7e0997c1e 100644 --- a/vpr/src/place/place_timing_update.cpp +++ b/vpr/src/place/place_timing_update.cpp @@ -298,7 +298,7 @@ void update_td_costs(const PlaceDelayModel* delay_model, #ifdef VTR_ASSERT_DEBUG_ENABLED double check_timing_cost = 0.; - comp_td_costs(delay_model, place_crit, &check_timing_cost); + comp_td_costs(delay_model, place_crit, placer_state, &check_timing_cost); VTR_ASSERT_DEBUG_MSG(check_timing_cost == *timing_cost, "Total timing cost calculated incrementally in update_td_costs() is " "not consistent with value calculated from scratch in comp_td_costs()"); diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index c16a0d6dbad..f7c333aa53e 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -70,7 +70,8 @@ static t_chan_width setup_chan_width(const t_router_opts& router_opts, static float route_connection_delay( RouterDelayProfiler& route_profiler, - int layer_num, + int from_layer_num, + int to_layer_num, int source_x_loc, int source_y_loc, int sink_x_loc, @@ -89,6 +90,7 @@ typedef std::function&, @@ -98,7 +100,8 @@ typedef std::function>& matrix, - int layer_num, + int from_layer_num, + int to_layer_num, int source_x, int source_y, int start_x, @@ -113,7 +116,8 @@ static void generic_compute_matrix_iterative_astar( static void generic_compute_matrix_dijkstra_expansion( RouterDelayProfiler& route_profiler, vtr::Matrix>& matrix, - int layer_num, + int from_layer_num, + int to_layer_num, int source_x, int source_y, int start_x, @@ -125,7 +129,7 @@ static void generic_compute_matrix_dijkstra_expansion( const std::set& allowed_types, bool is_flat); -static vtr::NdMatrix compute_delta_delays( +static vtr::NdMatrix compute_delta_delays( RouterDelayProfiler& route_profiler, const t_placer_opts& palcer_opts, const t_router_opts& router_opts, @@ -135,7 +139,7 @@ static vtr::NdMatrix compute_delta_delays( float delay_reduce(std::vector& delays, e_reducer reducer); -static vtr::NdMatrix compute_delta_delay_model( +static vtr::NdMatrix compute_delta_delay_model( RouterDelayProfiler& route_profiler, const t_placer_opts& placer_opts, const t_router_opts& router_opts, @@ -160,14 +164,17 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct, RRNodeId& out_src_node, RRNodeId& out_sink_node); -static bool verify_delta_delays(const vtr::NdMatrix& delta_delays); +static bool verify_delta_delays(const vtr::NdMatrix& delta_delays); static int get_longest_segment_length(std::vector& segment_inf); -static void fix_empty_coordinates(vtr::NdMatrix& delta_delays); -static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays); +static void fix_empty_coordinates(vtr::NdMatrix& delta_delays); +static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays); -static float find_neightboring_average(vtr::NdMatrix& matrix, t_physical_tile_loc tile_loc, int max_distance); +static float find_neightboring_average(vtr::NdMatrix& matrix, + int from_layer, + t_physical_tile_loc to_tile_loc, + int max_distance); /******* Globally Accessible Functions **********/ @@ -368,7 +375,8 @@ static t_chan_width setup_chan_width(const t_router_opts& router_opts, static float route_connection_delay( RouterDelayProfiler& route_profiler, - int layer_num, + int from_layer_num, + int to_layer_num, int source_x, int source_y, int sink_x, @@ -384,18 +392,18 @@ static float route_connection_delay( bool successfully_routed = false; //Get the rr nodes to route between - auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, layer_num})); - auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num})); + auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num})); + auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num})); for (int driver_ptc : best_driver_ptcs) { VTR_ASSERT(driver_ptc != OPEN); - RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(layer_num, source_x, source_y, SOURCE, driver_ptc); + RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(from_layer_num, source_x, source_y, SOURCE, driver_ptc); VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); for (int sink_ptc : best_sink_ptcs) { VTR_ASSERT(sink_ptc != OPEN); - RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(layer_num, sink_x, sink_y, SINK, sink_ptc); + RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(to_layer_num, sink_x, sink_y, SINK, sink_ptc); if (sink_rr_node == RRNodeId::INVALID()) continue; @@ -409,8 +417,7 @@ static float route_connection_delay( successfully_routed = route_profiler.calculate_delay( source_rr_node, sink_rr_node, router_opts, - &net_delay_value, - layer_num); + &net_delay_value); } if (successfully_routed) break; @@ -420,7 +427,7 @@ static float route_connection_delay( if (!successfully_routed) { VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n", - layer_num, source_x, source_y, layer_num, sink_x, sink_y, net_delay_value); + source_x, source_y, from_layer_num, sink_x, sink_y, to_layer_num, net_delay_value); } return (net_delay_value); @@ -443,7 +450,8 @@ static void add_delay_to_matrix( static void generic_compute_matrix_dijkstra_expansion( RouterDelayProfiler& /*route_profiler*/, vtr::Matrix>& matrix, - int layer_num, + int from_layer_num, + int to_layer_num, int source_x, int source_y, int start_x, @@ -456,7 +464,7 @@ static void generic_compute_matrix_dijkstra_expansion( bool is_flat) { auto& device_ctx = g_vpr_ctx.device(); - t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, layer_num}); + t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}); bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end(); if (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE || !is_allowed_type) { for (int sink_x = start_x; sink_x <= end_x; sink_x++) { @@ -483,10 +491,10 @@ static void generic_compute_matrix_dijkstra_expansion( vtr::Matrix found_matrix({matrix.dim_size(0), matrix.dim_size(1)}, false); - auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, layer_num})); + auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num})); for (int driver_ptc : best_driver_ptcs) { VTR_ASSERT(driver_ptc != OPEN); - RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(layer_num, source_x, source_y, SOURCE, driver_ptc); + RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(from_layer_num, source_x, source_y, SOURCE, driver_ptc); VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); auto delays = calculate_all_path_delays_from_rr_node(source_rr_node, router_opts, is_flat); @@ -501,7 +509,7 @@ static void generic_compute_matrix_dijkstra_expansion( continue; } - t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num}); + t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}); if (sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { if (matrix[delta_x][delta_y].empty()) { //Only set empty target if we don't already have a valid delta delay @@ -517,10 +525,10 @@ static void generic_compute_matrix_dijkstra_expansion( } } else { bool found_a_sink = false; - auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num})); + auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num})); for (int sink_ptc : best_sink_ptcs) { VTR_ASSERT(sink_ptc != OPEN); - RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(layer_num, sink_x, sink_y, SINK, sink_ptc); + RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(to_layer_num, sink_x, sink_y, SINK, sink_ptc); if (sink_rr_node == RRNodeId::INVALID()) continue; @@ -568,8 +576,14 @@ static void generic_compute_matrix_dijkstra_expansion( int delta_y = abs(sink_y - source_y); if (!found_matrix[delta_x][delta_y]) { add_delay_to_matrix(&matrix, delta_x, delta_y, IMPOSSIBLE_DELTA); - VTR_LOG_WARN("Unable to route between blocks at (%d,%d) and (%d,%d) to characterize delay (setting to %g)\n", - source_x, source_y, sink_x, sink_y, IMPOSSIBLE_DELTA); + VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n", + source_x, + source_y, + from_layer_num, + sink_x, + sink_y, + to_layer_num, + IMPOSSIBLE_DELTA); } } } @@ -578,7 +592,8 @@ static void generic_compute_matrix_dijkstra_expansion( static void generic_compute_matrix_iterative_astar( RouterDelayProfiler& route_profiler, vtr::Matrix>& matrix, - int layer_num, + int from_layer_num, + int to_layer_num, int source_x, int source_y, int start_x, @@ -601,8 +616,8 @@ static void generic_compute_matrix_iterative_astar( delta_x = abs(sink_x - source_x); delta_y = abs(sink_y - source_y); - t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, layer_num}); - t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num}); + t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}); + t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}); bool src_or_target_empty = (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE || sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE); @@ -624,7 +639,15 @@ static void generic_compute_matrix_iterative_astar( } else { //Valid start/end - float delay = route_connection_delay(route_profiler, layer_num, source_x, source_y, sink_x, sink_y, router_opts, measure_directconnect); + float delay = route_connection_delay(route_profiler, + from_layer_num, + to_layer_num, + source_x, + source_y, + sink_x, + sink_y, + router_opts, + measure_directconnect); #ifdef VERBOSE VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n", @@ -645,7 +668,7 @@ static void generic_compute_matrix_iterative_astar( } } -static vtr::NdMatrix compute_delta_delays( +static vtr::NdMatrix compute_delta_delays( RouterDelayProfiler& route_profiler, const t_placer_opts& placer_opts, const t_router_opts& router_opts, @@ -659,195 +682,196 @@ static vtr::NdMatrix compute_delta_delays( auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; - vtr::NdMatrix delta_delays({static_cast(grid.get_num_layers()), grid.width(), grid.height()}); + vtr::NdMatrix delta_delays({static_cast(grid.get_num_layers()), static_cast(grid.get_num_layers()), grid.width(), grid.height()}); - for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) { - vtr::Matrix> sampled_delta_delays({grid.width(), grid.height()}); + for (int from_layer_num = 0; from_layer_num < grid.get_num_layers(); from_layer_num++) { + for (int to_layer_num = 0; to_layer_num < grid.get_num_layers(); to_layer_num++) { + vtr::NdMatrix, 2> sampled_delta_delays({grid.width(), grid.height()}); - size_t mid_x = vtr::nint(grid.width() / 2); - size_t mid_y = vtr::nint(grid.height() / 2); + size_t mid_x = vtr::nint(grid.width() / 2); + size_t mid_y = vtr::nint(grid.height() / 2); - size_t low_x = std::min(longest_length, mid_x); - size_t low_y = std::min(longest_length, mid_y); - size_t high_x = mid_x; - size_t high_y = mid_y; - if (longest_length <= grid.width()) { - high_x = std::max(grid.width() - longest_length, mid_x); - } - if (longest_length <= grid.height()) { - high_y = std::max(grid.height() - longest_length, mid_y); - } + size_t low_x = std::min(longest_length, mid_x); + size_t low_y = std::min(longest_length, mid_y); + size_t high_x = mid_x; + size_t high_y = mid_y; + if (longest_length <= grid.width()) { + high_x = std::max(grid.width() - longest_length, mid_x); + } + if (longest_length <= grid.height()) { + high_y = std::max(grid.height() - longest_length, mid_y); + } - std::set allowed_types; - if (!placer_opts.allowed_tiles_for_delay_model.empty()) { - auto allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ","); - for (const auto& type : allowed_types_vector) { - allowed_types.insert(type); + std::set allowed_types; + if (!placer_opts.allowed_tiles_for_delay_model.empty()) { + auto allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ","); + for (const auto& type : allowed_types_vector) { + allowed_types.insert(type); + } } - } - // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - // + | | + - // + A | B | C + - // + | | + - // +-----------------\-----------------------.---------------+ - // + | | + - // + | | + - // + | | + - // + | | + - // + D | E | F + - // + | | + - // + | | + - // + | | + - // + | | + - // +-----------------*-----------------------/---------------+ - // + | | + - // + G | H | I + - // + | | + - // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - // - // * = (low_x, low_y) - // . = (high_x, high_y) - // / = (high_x, low_y) - // \ = (low_x, high_y) - // + = device edge - - //Find the lowest y location on the left edge with a non-empty block - int y = 0; - int x = 0; - t_physical_tile_type_ptr src_type = nullptr; - for (x = 0; x < (int)grid.width(); ++x) { - for (y = 0; y < (int)grid.height(); ++y) { - auto type = grid.get_physical_type({x, y, layer_num}); + // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + // + | | + + // + A | B | C + + // + | | + + // +-----------------\-----------------------.---------------+ + // + | | + + // + | | + + // + | | + + // + | | + + // + D | E | F + + // + | | + + // + | | + + // + | | + + // + | | + + // +-----------------*-----------------------/---------------+ + // + | | + + // + G | H | I + + // + | | + + // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + // + // * = (low_x, low_y) + // . = (high_x, high_y) + // / = (high_x, low_y) + // \ = (low_x, high_y) + // + = device edge + + //Find the lowest y location on the left edge with a non-empty block + int y = 0; + int x = 0; + t_physical_tile_type_ptr src_type = nullptr; + for (x = 0; x < (int)grid.width(); ++x) { + for (y = 0; y < (int)grid.height(); ++y) { + auto type = grid.get_physical_type({x, y, from_layer_num}); - if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { - if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) { - continue; + if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) { + continue; + } + src_type = type; + break; } - src_type = type; + } + if (src_type) { break; } } - if (src_type) { - break; + VTR_ASSERT(src_type != nullptr); + + t_compute_delta_delay_matrix generic_compute_matrix; + switch (placer_opts.place_delta_delay_matrix_calculation_method) { + case e_place_delta_delay_algorithm::ASTAR_ROUTE: + generic_compute_matrix = generic_compute_matrix_iterative_astar; + break; + case e_place_delta_delay_algorithm::DIJKSTRA_EXPANSION: + generic_compute_matrix = generic_compute_matrix_dijkstra_expansion; + break; + default: + VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unknown place_delta_delay_matrix_calculation_method %d", placer_opts.place_delta_delay_matrix_calculation_method); } - } - VTR_ASSERT(src_type != nullptr); - - t_compute_delta_delay_matrix generic_compute_matrix; - switch (placer_opts.place_delta_delay_matrix_calculation_method) { - case e_place_delta_delay_algorithm::ASTAR_ROUTE: - generic_compute_matrix = generic_compute_matrix_iterative_astar; - break; - case e_place_delta_delay_algorithm::DIJKSTRA_EXPANSION: - generic_compute_matrix = generic_compute_matrix_dijkstra_expansion; - break; - default: - VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unknown place_delta_delay_matrix_calculation_method %d", placer_opts.place_delta_delay_matrix_calculation_method); - } #ifdef VERBOSE - VTR_LOG("Computing from lower left edge (%d,%d):\n", x, y); + VTR_LOG("Computing from lower left edge (%d,%d):\n", x, y); #endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - layer_num, - x, y, - x, y, - grid.width() - 1, grid.height() - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Find the lowest x location on the bottom edge with a non-empty block - src_type = nullptr; - for (y = 0; y < (int)grid.height(); ++y) { - for (x = 0; x < (int)grid.width(); ++x) { - auto type = grid.get_physical_type({x, y, layer_num}); + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + x, y, + x, y, + grid.width() - 1, grid.height() - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Find the lowest x location on the bottom edge with a non-empty block + src_type = nullptr; + for (y = 0; y < (int)grid.height(); ++y) { + for (x = 0; x < (int)grid.width(); ++x) { + auto type = grid.get_physical_type({x, y, from_layer_num}); - if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { - if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) { - continue; + if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) { + continue; + } + src_type = type; + break; } - src_type = type; + } + if (src_type) { break; } } - if (src_type) { - break; - } - } - VTR_ASSERT(src_type != nullptr); + VTR_ASSERT(src_type != nullptr); #ifdef VERBOSE - VTR_LOG("Computing from left bottom edge (%d,%d):\n", x, y); + VTR_LOG("Computing from left bottom edge (%d,%d):\n", x, y); #endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - layer_num, - x, y, - x, y, - grid.width() - 1, grid.height() - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions B, C, E, F + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + x, y, + x, y, + grid.width() - 1, grid.height() - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions B, C, E, F #ifdef VERBOSE - VTR_LOG("Computing from low/low:\n"); + VTR_LOG("Computing from low/low:\n"); #endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - layer_num, - low_x, low_y, - low_x, low_y, - grid.width() - 1, grid.height() - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions D, E, G, H + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + low_x, low_y, + low_x, low_y, + grid.width() - 1, grid.height() - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions D, E, G, H #ifdef VERBOSE - VTR_LOG("Computing from high/high:\n"); + VTR_LOG("Computing from high/high:\n"); #endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - layer_num, - high_x, high_y, - 0, 0, - high_x, high_y, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions A, B, D, E + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + high_x, high_y, + 0, 0, + high_x, high_y, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions A, B, D, E #ifdef VERBOSE - VTR_LOG("Computing from high/low:\n"); + VTR_LOG("Computing from high/low:\n"); #endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - layer_num, - high_x, low_y, - 0, low_y, - high_x, grid.height() - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions E, F, H, I + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + high_x, low_y, + 0, low_y, + high_x, grid.height() - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions E, F, H, I #ifdef VERBOSE - VTR_LOG("Computing from low/high:\n"); + VTR_LOG("Computing from low/high:\n"); #endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - layer_num, - low_x, high_y, - low_x, 0, - grid.width() - 1, high_y, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) { - for (size_t dy = 0; dy < sampled_delta_delays.dim_size(1); ++dy) { - delta_delays[layer_num][dx][dy] = delay_reduce(sampled_delta_delays[dx][dy], placer_opts.delay_model_reducer); + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + low_x, high_y, + low_x, 0, + grid.width() - 1, high_y, + router_opts, + measure_directconnect, allowed_types, + is_flat); + for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) { + for (size_t dy = 0; dy < sampled_delta_delays.dim_size(1); ++dy) { + delta_delays[from_layer_num][to_layer_num][dx][dy] = delay_reduce(sampled_delta_delays[dx][dy], placer_opts.delay_model_reducer); + } } } } @@ -895,19 +919,20 @@ float delay_reduce(std::vector& delays, e_reducer reducer) { * we return IMPOSSIBLE_DELTA. */ static float find_neightboring_average( - vtr::NdMatrix& matrix, - t_physical_tile_loc tile_loc, + vtr::NdMatrix& matrix, + int from_layer, + t_physical_tile_loc to_tile_loc, int max_distance) { float sum = 0; int counter = 0; - int endx = matrix.end_index(1); - int endy = matrix.end_index(2); + int endx = matrix.end_index(2); + int endy = matrix.end_index(3); int delx, dely; - int x = tile_loc.x; - int y = tile_loc.y; - int layer_num = tile_loc.layer_num; + int x = to_tile_loc.x; + int y = to_tile_loc.y; + int to_layer = to_tile_loc.layer_num; for (int distance = 1; distance <= max_distance; ++distance) { for (delx = x - distance; delx <= x + distance; delx++) { @@ -922,11 +947,11 @@ static float find_neightboring_average( continue; } - if (matrix[layer_num][delx][dely] == EMPTY_DELTA || matrix[layer_num][delx][dely] == IMPOSSIBLE_DELTA) { + if (matrix[from_layer][to_layer][delx][dely] == EMPTY_DELTA || matrix[from_layer][to_layer][delx][dely] == IMPOSSIBLE_DELTA) { continue; } counter++; - sum += matrix[layer_num][delx][dely]; + sum += matrix[from_layer][to_layer][delx][dely]; } } if (counter != 0) { @@ -937,7 +962,7 @@ static float find_neightboring_average( return IMPOSSIBLE_DELTA; } -static void fix_empty_coordinates(vtr::NdMatrix& delta_delays) { +static void fix_empty_coordinates(vtr::NdMatrix& delta_delays) { // Set any empty delta's to the average of it's neighbours // // Empty coordinates may occur if the sampling location happens to not have @@ -945,32 +970,40 @@ static void fix_empty_coordinates(vtr::NdMatrix& delta_delays) { // would return a result, so we fill in the empty holes with a small // neighbour average. constexpr int kMaxAverageDistance = 2; - for (int layer_num = 0; layer_num < (int)delta_delays.dim_size(0); ++layer_num) { - for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(1); ++delta_x) { - for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(2); ++delta_y) { - if (delta_delays[layer_num][delta_x][delta_y] == EMPTY_DELTA) { - delta_delays[layer_num][delta_x][delta_y] = find_neightboring_average(delta_delays, {delta_x, delta_y, layer_num}, kMaxAverageDistance); + for (int from_layer = 0; from_layer < (int)delta_delays.dim_size(0); ++from_layer) { + for (int to_layer = 0; to_layer < (int)delta_delays.dim_size(1); ++to_layer) { + for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) { + for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) { + if (delta_delays[from_layer][to_layer][delta_x][delta_y] == EMPTY_DELTA) { + delta_delays[from_layer][to_layer][delta_x][delta_y] = + find_neightboring_average(delta_delays, + from_layer, + {delta_x, delta_y, to_layer}, + kMaxAverageDistance); + } } } } } } -static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays) { +static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays) { // Set any empty delta's to the average of it's neighbours - for (size_t layer_num = 0; layer_num < delta_delays.dim_size(0); ++layer_num) { - for (size_t delta_x = 0; delta_x < delta_delays.dim_size(1); ++delta_x) { - for (size_t delta_y = 0; delta_y < delta_delays.dim_size(2); ++delta_y) { - if (delta_delays[layer_num][delta_x][delta_y] == UNINITIALIZED_DELTA) { - delta_delays[layer_num][delta_x][delta_y] = IMPOSSIBLE_DELTA; + for (size_t from_layer_num = 0; from_layer_num < delta_delays.dim_size(0); ++from_layer_num) { + for (size_t to_layer_num = 0; to_layer_num < delta_delays.dim_size(1); ++to_layer_num) { + for (size_t delta_x = 0; delta_x < delta_delays.dim_size(2); ++delta_x) { + for (size_t delta_y = 0; delta_y < delta_delays.dim_size(3); ++delta_y) { + if (delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] == UNINITIALIZED_DELTA) { + delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] = IMPOSSIBLE_DELTA; + } } } } } } -static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays) { +static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays) { // Set any impossible delta's to the average of it's neighbours // // Impossible coordinates may occur if an IPIN cannot be reached from the @@ -983,19 +1016,21 @@ static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays) { // filling these gaps. It is more important to have a poor predication, // than a invalid value and causing a slack assertion. constexpr int kMaxAverageDistance = 5; - for (int layer_num = 0; layer_num < (int)delta_delays.dim_size(0); ++layer_num) { - for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(1); ++delta_x) { - for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(2); ++delta_y) { - if (delta_delays[layer_num][delta_x][delta_y] == IMPOSSIBLE_DELTA) { - delta_delays[layer_num][delta_x][delta_y] = find_neightboring_average( - delta_delays, {delta_x, delta_y, layer_num}, kMaxAverageDistance); + for (int from_layer_num = 0; from_layer_num < (int)delta_delays.dim_size(0); ++from_layer_num) { + for (int to_layer_num = 0; to_layer_num < (int)delta_delays.dim_size(1); ++to_layer_num) { + for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) { + for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) { + if (delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] == IMPOSSIBLE_DELTA) { + delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] = find_neightboring_average( + delta_delays, from_layer_num, {delta_x, delta_y, to_layer_num}, kMaxAverageDistance); + } } } } } } -static vtr::NdMatrix compute_delta_delay_model( +static vtr::NdMatrix compute_delta_delay_model( RouterDelayProfiler& route_profiler, const t_placer_opts& placer_opts, const t_router_opts& router_opts, @@ -1003,7 +1038,7 @@ static vtr::NdMatrix compute_delta_delay_model( int longest_length, bool is_flat) { vtr::ScopedStartFinishTimer timer("Computing delta delays"); - vtr::NdMatrix delta_delays = compute_delta_delays(route_profiler, + vtr::NdMatrix delta_delays = compute_delta_delays(route_profiler, placer_opts, router_opts, measure_directconnect, @@ -1089,7 +1124,7 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct, //Check that the from pin exists at this from location //(with multi-width/height blocks pins may not exist at all locations) bool from_pin_found = false; - if (direct->from_side != NUM_SIDES) { + if (direct->from_side != NUM_2D_SIDES) { RRNodeId from_pin_rr = node_lookup.find_node(layer_num, x, y, OPIN, from_pin, direct->from_side); from_pin_found = from_pin_rr.is_valid(); } else { @@ -1105,7 +1140,7 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct, //Check that the from pin exists at this from location //(with multi-width/height blocks pins may not exist at all locations) bool to_pin_found = false; - if (direct->to_side != NUM_SIDES) { + if (direct->to_side != NUM_2D_SIDES) { RRNodeId to_pin_rr = node_lookup.find_node(layer_num, to_x, to_y, IPIN, to_pin, direct->to_side); to_pin_found = (to_pin_rr != RRNodeId::INVALID()); } else { @@ -1164,19 +1199,21 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct, return true; } -static bool verify_delta_delays(const vtr::NdMatrix& delta_delays) { +static bool verify_delta_delays(const vtr::NdMatrix& delta_delays) { auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; - for (int layer_num = 0; layer_num < grid.get_num_layers(); ++layer_num) { - for (size_t x = 0; x < grid.width(); ++x) { - for (size_t y = 0; y < grid.height(); ++y) { - float delta_delay = delta_delays[layer_num][x][y]; + for (int from_layer_num = 0; from_layer_num < grid.get_num_layers(); ++from_layer_num) { + for (int to_layer_num = 0; to_layer_num < grid.get_num_layers(); ++to_layer_num) { + for (size_t x = 0; x < grid.width(); ++x) { + for (size_t y = 0; y < grid.height(); ++y) { + float delta_delay = delta_delays[from_layer_num][to_layer_num][x][y]; - if (delta_delay < 0.) { - VPR_ERROR(VPR_ERROR_PLACE, - "Found invaild negative delay %g for delta (%d,%d)", - delta_delay, x, y); + if (delta_delay < 0.) { + VPR_ERROR(VPR_ERROR_PLACE, + "Found invaild negative delay %g for delta [%d,%d,%d,%d]", + delta_delay, from_layer_num, to_layer_num, x, y); + } } } } @@ -1246,7 +1283,7 @@ void OverrideDelayModel::compute_override_delay_model( if (sampled_rr_pairs.count({src_rr, sink_rr})) continue; float direct_connect_delay = std::numeric_limits::quiet_NaN(); - bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay, OPEN); + bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay); if (found_routing_path) { set_delay_override(from_type->index, from_pin_class, to_type->index, to_pin_class, direct->x_offset, direct->y_offset, direct_connect_delay); diff --git a/vpr/src/route/build_switchblocks.cpp b/vpr/src/route/build_switchblocks.cpp index 3e9f8386312..22714b7b224 100644 --- a/vpr/src/route/build_switchblocks.cpp +++ b/vpr/src/route/build_switchblocks.cpp @@ -201,6 +201,7 @@ static void count_wire_type_sizes(const t_chan_seg_details* channel, int nodes_p static void compute_wire_connections( int x_coord, int y_coord, + int layer_coord, enum e_side from_side, enum e_side to_side, const t_chan_details& chan_details_x, @@ -223,8 +224,10 @@ static void compute_wireconn_connections( Switchblock_Lookup sb_conn, int from_x, int from_y, + int from_layer, int to_x, int to_y, + int to_layer, t_rr_type from_chan_type, t_rr_type to_chan_type, const t_wire_type_sizes* wire_type_sizes_x, @@ -237,7 +240,24 @@ static void compute_wireconn_connections( static int evaluate_num_conns_formula(t_wireconn_scratchpad* scratchpad, std::string num_conns_formula, int from_wire_count, int to_wire_count); -/* returns the wire indices belonging to the types in 'wire_type_vec' and switchpoints in 'points' at the given channel segment */ +/** + * + * @brief calculates the wire indices belonging to the types in types in 'wire_type_sizes' and switchpoints in 'points' at the given channel segment + * + * @param grid device grid + * @param chan_details channel segment details (length, start and end points, ...) + * @param chan_type channel type (CHANX/CHANY) + * @param x the wire x-coordinate + * @param y the wire y-coordinate + * @param side switch block side (top/right/bottom/left/above/under) + * @param wire_switchpoints_vec valid switch points at the given channel segment + * @param wire_type_sizes valid wire types + * @param is_dest whether wires are source or destination within a switch block connection + * @param order switchpoint order (fixed, shuffled) specified in the architecture file + * @param rand_state used to randomly shuffle switchpoint if required (shuffled order) + * @param output_wires collected wire indices that matches the specified types and switchpoints + * + */ static void get_switchpoint_wires( const DeviceGrid& grid, const t_chan_seg_details* chan_details, @@ -253,10 +273,38 @@ static void get_switchpoint_wires( std::vector* output_wires, std::vector* scratch_wires); -static const t_chan_details& index_into_correct_chan(int tile_x, int tile_y, enum e_side side, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, int* chan_x, int* chan_y, t_rr_type* chan_type); +/** + * @brief finds the correct channel (x or y), and the coordinates to index into it based on the + * specified tile coordinates (x,y,layer) and the switch block side. + * + * @param tile_x x-coordinate of the tile + * @param tile_y y-coordinate of the tile + * @param tile_layer layer-coordinate of the tile + * @param src_side switch block source side + * @param dest_side swtich block destination side + * @param chan_details_x x-channel segment details (length, start and end points, ...) + * @param chan_details_y x-channel segment details (length, start and end points, ...) + * @param chan_x x-coordinate of the channel + * @param chan_y y-coordinate of the channel + * @param chan_layer layer_coordinate of the channel + * @param chan_type chan type that the function index into + * + * @return returns the type of channel that we are indexing into (ie, CHANX or CHANY) and channel coordinates and type + */ +static const t_chan_details& index_into_correct_chan(int tile_x, int tile_y, int tile_layer, enum e_side src_side, enum e_side dest_side, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, int& chan_x, int& chan_y, int& chan_layer, t_rr_type& chan_type); -/* checks whether the specified coordinates are out of bounds */ -static bool coords_out_of_bounds(const DeviceGrid& grid, int x_coord, int y_coord, e_rr_type chan_type); +/** + * @brief check whether a specific track location is valid within the device grid + * + * @param grid device grid + * @param x_coord x-coordinate of the location + * @param y_coord y-coordinate of the location + * @param layer_coord layer-coordinate of the location + * @param chan_type track channel type (CHANX or CHANY), required since device perimeter does not have certain channels + * + * @return true if the track location is outside the device grid, false otherwise. + */ +static bool coords_out_of_bounds(const DeviceGrid& grid, int x_coord, int y_coord, int layer_coord, e_rr_type chan_type); /* returns the subsegment number of the specified wire at seg_coord*/ static int get_wire_subsegment_num(const DeviceGrid& grid, e_rr_type chan_type, const t_chan_seg_details& wire_details, int seg_coord); @@ -267,27 +315,80 @@ int get_wire_segment_length(const DeviceGrid& grid, e_rr_type chan_type, const t * of seg_coord, and connection to the sb_side of the switchblock */ static int get_switchpoint_of_wire(const DeviceGrid& grid, e_rr_type chan_type, const t_chan_seg_details& wire_details, int seg_coord, e_side sb_side); -/* returns true if the coordinates x/y do not correspond to the location specified by 'location' */ -static bool sb_not_here(const DeviceGrid& grid, int x, int y, e_sb_location location); +/** + * @brief check whether a switch block exists in a specified coordinate within the device grid + * + * @param grid device grid + * @param inter_cluster_rr used to check whether inter-cluster programmable routing resources exist in the current layer + * @param x x-coordinate of the location + * @param y y-coordinate of the location + * @param layer layer-coordinate of the location + * @param location location of the switch block according to custom switch block description in the architecture file + * + * @return true if a switch block exists at the specified location, false otherwise. + */ +static bool sb_not_here(const DeviceGrid& grid, const std::vector& inter_cluster_rr, int x, int y, int layer, e_sb_location location); + +/** + * @brief check whether specified coordinate is located at the device grid corner and a switch block exists there + * + * @param grid device grid + * @param inter_cluster_rr used to check whether inter-cluster programmable routing resources exist in the current layer + * @param x x-coordinate of the location + * @param y y-coordinate of the location + * @param layer layer-coordinate of the location + * + * @return true if the specified coordinate represents a corner location within the device grid and a switch block exists there, false otherwise. + */ +static bool is_corner_sb(const DeviceGrid& grid, const std::vector& inter_cluster_rr, int x, int y, int layer); -/* checks if the specified coordinates represent a corner of the FPGA */ -static bool is_corner(const DeviceGrid& grid, int x, int y); +/** + * @brief check whether specified coordinate is located at one of the perimeter device grid locations and a switch block exists there + * + * @param grid device grid + * @param inter_cluster_rr used to check whether inter-cluster programmable routing resources exist in the current layer + * @param x x-coordinate of the location + * @param y y-coordinate of the location + * @param layer layer-coordinate of the location + * + * @return true if the specified coordinate represents a perimeter location within the device grid and a switch block exists there, false otherwise. + */ +static bool is_perimeter_sb(const DeviceGrid& grid, const std::vector& inter_cluster_rr, int x, int y, int layer); -/* checks if the specified coordinates correspond to one of the perimeter switchblocks */ -static bool is_perimeter(const DeviceGrid& grid, int x, int y); +/** + * @brief check whether specified coordinate is located at core of the device grid (not perimeter) and a switch block exists there + * + * @param grid device grid + * @param inter_cluster_rr used to check whether inter-cluster programmable routing resources exist in the current layer + * @param x x-coordinate of the location + * @param y y-coordinate of the location + * @param layer layer-coordinate of the location + * + * @return true if the specified coordinate represents a core location within the device grid and a switch block exists there, false otherwise. + */ +static bool is_core_sb(const DeviceGrid& grid, const std::vector& inter_cluster_rr, int x, int y, int layer); -/* checks if the specified coordinates correspond to the core of the FPGA (i.e. not perimeter) */ -static bool is_core(const DeviceGrid& grid, int x, int y); +/** + * @brief check whether specified layer has inter-cluster programmable routing resources or not. + * + * @param grid device grid + * @param inter_cluster_rr inter-cluster programmable routing resources availability within different layers in multi-die FPGAs + * @param layer a valid layer index within the device grid, must be between [0..num_layer-1] + * + * @return true if the specified layer contain inter-cluster programmable routing resources, false otherwise. + */ +static bool is_prog_routing_avail(const DeviceGrid& grid, const std::vector& inter_cluster_rr, int layer); /* adjusts a negative destination wire index calculated from a permutation formula */ static int adjust_formula_result(int dest_wire, int src_W, int dest_W, int connection_ind); /************ Function Definitions ************/ -/* allocate and build the switchblock permutation map */ + t_sb_connection_map* alloc_and_load_switchblock_permutations(const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, const DeviceGrid& grid, - std::vector switchblocks, + const std::vector& inter_cluster_rr, + const std::vector& switchblocks, t_chan_width* nodes_per_chan, e_directionality directionality, vtr::RandState& rand_state) { @@ -324,20 +425,23 @@ t_sb_connection_map* alloc_and_load_switchblock_permutations(const t_chan_detail if (directionality != sb.directionality) { VPR_FATAL_ERROR(VPR_ERROR_ARCH, "alloc_and_load_switchblock_connections: Switchblock %s does not match directionality of architecture\n", sb.name.c_str()); } - /* Iterate over the x,y coordinates spanning the FPGA. */ - for (size_t x_coord = 0; x_coord < grid.width(); x_coord++) { - for (size_t y_coord = 0; y_coord <= grid.height(); y_coord++) { - if (sb_not_here(grid, x_coord, y_coord, sb.location)) { - continue; - } - /* now we iterate over all the potential side1->side2 connections */ - for (e_side from_side : {TOP, RIGHT, BOTTOM, LEFT}) { - for (e_side to_side : {TOP, RIGHT, BOTTOM, LEFT}) { - /* Fill appropriate entry of the sb_conns map with vector specifying the wires - * the current wire will connect to */ - compute_wire_connections(x_coord, y_coord, from_side, to_side, - chan_details_x, chan_details_y, &sb, grid, - &wire_type_sizes_x, &wire_type_sizes_y, directionality, sb_conns, rand_state, &scratchpad); + /* Iterate over the x,y, layer coordinates spanning the FPGA, filling in all the switch blocks that exist */ + for (int layer_coord = 0; layer_coord < grid.get_num_layers(); layer_coord++) { + for (size_t x_coord = 0; x_coord < grid.width(); x_coord++) { + for (size_t y_coord = 0; y_coord <= grid.height(); y_coord++) { + if (sb_not_here(grid, inter_cluster_rr, x_coord, y_coord, layer_coord, sb.location)) { + continue; + } + /* now we iterate over all the potential side1->side2 connections */ + for (e_side from_side : TOTAL_3D_SIDES) { + for (e_side to_side : TOTAL_3D_SIDES) { + /* Fill appropriate entry of the sb_conns map with vector specifying the wires + * the current wire will connect to */ + compute_wire_connections(x_coord, y_coord, layer_coord, from_side, to_side, + chan_details_x, chan_details_y, &sb, grid, + &wire_type_sizes_x, &wire_type_sizes_y, directionality, sb_conns, + rand_state, &scratchpad); + } } } } @@ -361,8 +465,7 @@ void free_switchblock_permutations(t_sb_connection_map* sb_conns) { return; } -/* returns true if the coordinates x/y do not correspond to the location specified by 'location' */ -static bool sb_not_here(const DeviceGrid& grid, int x, int y, e_sb_location location) { +static bool sb_not_here(const DeviceGrid& grid, const std::vector& inter_cluster_rr, int x, int y, int layer, e_sb_location location) { bool sb_not_here = true; switch (location) { @@ -370,22 +473,22 @@ static bool sb_not_here(const DeviceGrid& grid, int x, int y, e_sb_location loca sb_not_here = false; break; case E_PERIMETER: - if (is_perimeter(grid, x, y)) { + if (is_perimeter_sb(grid, inter_cluster_rr, x, y, layer)) { sb_not_here = false; } break; case E_CORNER: - if (is_corner(grid, x, y)) { + if (is_corner_sb(grid, inter_cluster_rr, x, y, layer)) { sb_not_here = false; } break; case E_CORE: - if (is_core(grid, x, y)) { + if (is_core_sb(grid, inter_cluster_rr, x, y, layer)) { sb_not_here = false; } break; case E_FRINGE: - if (is_perimeter(grid, x, y) && !is_corner(grid, x, y)) { + if (is_perimeter_sb(grid, inter_cluster_rr, x, y, layer) && !is_corner_sb(grid, inter_cluster_rr, x, y, layer)) { sb_not_here = false; } break; @@ -396,8 +499,10 @@ static bool sb_not_here(const DeviceGrid& grid, int x, int y, e_sb_location loca return sb_not_here; } -/* checks if the specified coordinates represent a corner of the FPGA */ -static bool is_corner(const DeviceGrid& grid, int x, int y) { +static bool is_corner_sb(const DeviceGrid& grid, const std::vector& inter_cluster_rr, int x, int y, int layer) { + if (!is_prog_routing_avail(grid, inter_cluster_rr, layer)) { + return false; + } bool is_corner = false; if ((x == 0 && y == 0) || (x == 0 && y == int(grid.height()) - 2) || //-2 for no perim channels (x == int(grid.width()) - 2 && y == 0) || //-2 for no perim channels @@ -407,8 +512,10 @@ static bool is_corner(const DeviceGrid& grid, int x, int y) { return is_corner; } -/* checks if the specified coordinates correspond to one of the perimeter switchblocks */ -static bool is_perimeter(const DeviceGrid& grid, int x, int y) { +static bool is_perimeter_sb(const DeviceGrid& grid, const std::vector& inter_cluster_rr, int x, int y, int layer) { + if (!is_prog_routing_avail(grid, inter_cluster_rr, layer)) { + return false; + } bool is_perimeter = false; if (x == 0 || x == int(grid.width()) - 2 || y == 0 || y == int(grid.height()) - 2) { is_perimeter = true; @@ -416,13 +523,25 @@ static bool is_perimeter(const DeviceGrid& grid, int x, int y) { return is_perimeter; } -/* checks if the specified coordinates correspond to the core of the FPGA (i.e. not perimeter) */ -static bool is_core(const DeviceGrid& grid, int x, int y) { - bool is_core = !is_perimeter(grid, x, y); +static bool is_core_sb(const DeviceGrid& grid, const std::vector& inter_cluster_rr, int x, int y, int layer) { + if (!is_prog_routing_avail(grid, inter_cluster_rr, layer)) { + return false; + } + bool is_core = !is_perimeter_sb(grid, inter_cluster_rr, x, y, layer); return is_core; } -/* Counts the number of wires in each wire type in the specified channel */ +static bool is_prog_routing_avail(const DeviceGrid& grid, const std::vector& inter_cluster_rr, int layer) { + bool is_prog_avail = true; + //make sure layer number is legal + VTR_ASSERT(layer >= 0 && layer < grid.get_num_layers()); + //check if the current layer has programmable routing resources before trying to build a custom switch blocks + if (!inter_cluster_rr.at(layer)) { + is_prog_avail = false; + } + return is_prog_avail; +} + static void count_wire_type_sizes(const t_chan_seg_details* channel, int nodes_per_chan, t_wire_type_sizes* wire_type_sizes) { vtr::string_view wire_type; vtr::string_view new_type; @@ -454,7 +573,6 @@ static void count_wire_type_sizes(const t_chan_seg_details* channel, int nodes_p return; } -/* returns the wire indices belonging to the types in 'wire_type_vec' and switchpoints in 'points' at the given channel segment */ static void get_switchpoint_wires( const DeviceGrid& grid, const t_chan_seg_details* chan_details, @@ -551,16 +669,14 @@ static void get_switchpoint_wires( } } -/* Compute the wire(s) that the wire at (x, y, from_side, to_side) should connect to. - * sb_conns is updated with the result */ -static void compute_wire_connections(int x_coord, int y_coord, enum e_side from_side, enum e_side to_side, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, t_switchblock_inf* sb, const DeviceGrid& grid, const t_wire_type_sizes* wire_type_sizes_x, const t_wire_type_sizes* wire_type_sizes_y, e_directionality directionality, t_sb_connection_map* sb_conns, vtr::RandState& rand_state, t_wireconn_scratchpad* scratchpad) { - int from_x, from_y; /* index into source channel */ - int to_x, to_y; /* index into destination channel */ +static void compute_wire_connections(int x_coord, int y_coord, int layer_coord, enum e_side from_side, enum e_side to_side, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, t_switchblock_inf* sb, const DeviceGrid& grid, const t_wire_type_sizes* wire_type_sizes_x, const t_wire_type_sizes* wire_type_sizes_y, e_directionality directionality, t_sb_connection_map* sb_conns, vtr::RandState& rand_state, t_wireconn_scratchpad* scratchpad) { + int from_x, from_y, from_layer; /* index into source channel */ + int to_x, to_y, to_layer; /* index into destination channel */ t_rr_type from_chan_type, to_chan_type; /* the type of channel - i.e. CHANX or CHANY */ - from_x = from_y = to_x = to_y = UNDEFINED; + from_x = from_y = to_x = to_y = from_layer = to_layer = UNDEFINED; - SB_Side_Connection side_conn(from_side, to_side); /* for indexing into this switchblock's permutation funcs */ - Switchblock_Lookup sb_conn(x_coord, y_coord, from_side, to_side); /* for indexing into FPGA's switchblock map */ + SB_Side_Connection side_conn(from_side, to_side); /* for indexing into this switchblock's permutation funcs */ + Switchblock_Lookup sb_conn(x_coord, y_coord, layer_coord, from_side, to_side); /* for indexing into FPGA's switchblock map */ /* can't connect a switchblock side to itself */ if (from_side == to_side) { @@ -573,18 +689,18 @@ static void compute_wire_connections(int x_coord, int y_coord, enum e_side from_ } /* find the correct channel, and the coordinates to index into it for both the source and - * destination channels. also return the channel type (ie chanx/chany) into which we are + * destination channels. also return the channel type (ie chanx/chany/both) into which we are * indexing */ /* details for source channel */ - const t_chan_details& from_chan_details = index_into_correct_chan(x_coord, y_coord, from_side, chan_details_x, chan_details_y, - &from_x, &from_y, &from_chan_type); + const t_chan_details& from_chan_details = index_into_correct_chan(x_coord, y_coord, layer_coord, from_side, to_side, chan_details_x, chan_details_y, + from_x, from_y, from_layer, from_chan_type); /* details for destination channel */ - const t_chan_details& to_chan_details = index_into_correct_chan(x_coord, y_coord, to_side, chan_details_x, chan_details_y, - &to_x, &to_y, &to_chan_type); + const t_chan_details& to_chan_details = index_into_correct_chan(x_coord, y_coord, layer_coord, to_side, from_side, chan_details_x, chan_details_y, + to_x, to_y, to_layer, to_chan_type); /* make sure from_x/y and to_x/y aren't out of bounds */ - if (coords_out_of_bounds(grid, to_x, to_y, to_chan_type) || coords_out_of_bounds(grid, from_x, from_y, from_chan_type)) { + if (coords_out_of_bounds(grid, to_x, to_y, to_layer, to_chan_type) || coords_out_of_bounds(grid, from_x, from_y, from_layer, from_chan_type)) { return; } @@ -596,6 +712,7 @@ static void compute_wire_connections(int x_coord, int y_coord, enum e_side from_ if (to_chan_type == CHANY) { wire_type_sizes_to = wire_type_sizes_y; } + /* iterate over all the wire connections specified for this switch block */ for (int iconn = 0; iconn < (int)sb->wireconns.size(); iconn++) { /* pointer to a connection specification between wire types/subsegment_nums */ @@ -604,7 +721,7 @@ static void compute_wire_connections(int x_coord, int y_coord, enum e_side from_ /* compute the destination wire segments to which the source wire segment should connect based on the * current wireconn */ compute_wireconn_connections(grid, directionality, from_chan_details, to_chan_details, - sb_conn, from_x, from_y, to_x, to_y, from_chan_type, to_chan_type, wire_type_sizes_from, + sb_conn, from_x, from_y, from_layer, to_x, to_y, to_layer, from_chan_type, to_chan_type, wire_type_sizes_from, wire_type_sizes_to, sb, wireconn_ptr, sb_conns, rand_state, scratchpad); } @@ -623,8 +740,10 @@ static void compute_wireconn_connections( Switchblock_Lookup sb_conn, int from_x, int from_y, + int from_layer, int to_x, int to_y, + int to_layer, t_rr_type from_chan_type, t_rr_type to_chan_type, const t_wire_type_sizes* wire_type_sizes_from, @@ -636,25 +755,30 @@ static void compute_wireconn_connections( t_wireconn_scratchpad* scratchpad) { constexpr bool verbose = false; - /* vectors that will contain indices of the wires belonging to the source/dest wire types/points */ + //choose the from_side to be the same as to_side if the connection is travelling across dice in multi-die FPGAs + auto from_side = (sb_conn.from_side != ABOVE && sb_conn.from_side != UNDER) ? sb_conn.from_side : sb_conn.to_side; + //choose the to_side to be the same as from_side if the connection is travelling across dice in multi-die FPGAs + auto to_side = (sb_conn.to_side != ABOVE && sb_conn.to_side != UNDER) ? sb_conn.to_side : sb_conn.from_side; - get_switchpoint_wires(grid, - from_chan_details[from_x][from_y].data(), from_chan_type, from_x, from_y, sb_conn.from_side, + /* vectors that will contain indices of the wires belonging to the source/dest wire types/points */ + get_switchpoint_wires(grid, from_chan_details[from_x][from_y].data(), from_chan_type, from_x, from_y, from_side, wireconn_ptr->from_switchpoint_set, wire_type_sizes_from, false, wireconn_ptr->from_switchpoint_order, rand_state, &scratchpad->potential_src_wires, &scratchpad->scratch_wires); - get_switchpoint_wires(grid, - to_chan_details[to_x][to_y].data(), to_chan_type, to_x, to_y, sb_conn.to_side, - wireconn_ptr->to_switchpoint_set, wire_type_sizes_to, true, wireconn_ptr->to_switchpoint_order, rand_state, &scratchpad->potential_dest_wires, + + get_switchpoint_wires(grid, to_chan_details[to_x][to_y].data(), to_chan_type, to_x, to_y, to_side, + wireconn_ptr->to_switchpoint_set, wire_type_sizes_to, true, + wireconn_ptr->to_switchpoint_order, rand_state, &scratchpad->potential_dest_wires, &scratchpad->scratch_wires); const auto& potential_src_wires = scratchpad->potential_src_wires; const auto& potential_dest_wires = scratchpad->potential_dest_wires; - VTR_LOGV(verbose, "SB_LOC: %d,%d %s->%s\n", sb_conn.x_coord, sb_conn.y_coord, SIDE_STRING[sb_conn.from_side], SIDE_STRING[sb_conn.to_side]); +#ifdef VERBOSE_RR + VTR_LOGV(verbose, "SB_LOC: %d,%d %s->%s\n", sb_conn.x_coord, sb_conn.y_coord, TOTAL_2D_SIDE_STRINGS[sb_conn.from_side], TOTAL_2D_SIDE_STRINGS[sb_conn.to_side]); //Define to print out specific wire-switchpoints used in to/from sets, if verbose is set true -#if 0 + for (auto from_set : wireconn_ptr->from_switchpoint_set) { VTR_LOGV(verbose, " FROM_SET: %s @", from_set.segment_name.c_str()); for (int switchpoint : from_set.switchpoints) { @@ -719,13 +843,13 @@ static void compute_wireconn_connections( if (sb_conn.from_side == TOP || sb_conn.from_side == RIGHT) { continue; } - VTR_ASSERT(sb_conn.from_side == BOTTOM || sb_conn.from_side == LEFT); + VTR_ASSERT(sb_conn.from_side == BOTTOM || sb_conn.from_side == LEFT || sb_conn.from_side == ABOVE || sb_conn.from_side == UNDER); } else if (from_wire_direction == Direction::DEC) { /* a wire heading in the decreasing direction can only connect from the TOP or RIGHT sides of a switch block */ if (sb_conn.from_side == BOTTOM || sb_conn.from_side == LEFT) { continue; } - VTR_ASSERT(sb_conn.from_side == TOP || sb_conn.from_side == RIGHT); + VTR_ASSERT(sb_conn.from_side == TOP || sb_conn.from_side == RIGHT || sb_conn.from_side == ABOVE || sb_conn.from_side == UNDER); } else { VTR_ASSERT(from_wire_direction == Direction::BIDIR); } @@ -757,12 +881,19 @@ static void compute_wireconn_connections( t_switchblock_edge sb_edge; sb_edge.from_wire = from_wire; sb_edge.to_wire = to_wire; + sb_edge.from_wire_layer = from_layer; + sb_edge.to_wire_layer = to_layer; - // if the switch override has been set, use that. Otherwise use default + // if the switch override has been set, use that, Otherwise use default if (wireconn_ptr->switch_override_indx != DEFAULT_SWITCH) { sb_edge.switch_ind = wireconn_ptr->switch_override_indx; + } else if (from_layer == to_layer) { + sb_edge.switch_ind = to_chan_details[to_x][to_y][to_wire].arch_wire_switch(); + sb_edge.switch_ind_between_layers = -1; //the connection does not cross any layers } else { + VTR_ASSERT(from_layer != to_layer); sb_edge.switch_ind = to_chan_details[to_x][to_y][to_wire].arch_wire_switch(); + sb_edge.switch_ind_between_layers = to_chan_details[to_x][to_y][to_wire].arch_opin_between_dice_switch(); } VTR_LOGV(verbose, " make_conn: %d -> %d switch=%d\n", sb_edge.from_wire, sb_edge.to_wire, sb_edge.switch_ind); @@ -775,9 +906,9 @@ static void compute_wireconn_connections( std::swap(sb_reverse_edge.from_wire, sb_reverse_edge.to_wire); //Since we are implementing the reverse connection we have swapped from and to. // - //Coverity flags this (false positive), so annotatate so coverity ignores it: + //Coverity flags this (false positive), so annotate coverity ignores it: // coverity[swapped_arguments : Intentional] - Switchblock_Lookup sb_conn_reverse(sb_conn.x_coord, sb_conn.y_coord, sb_conn.to_side, sb_conn.from_side); + Switchblock_Lookup sb_conn_reverse(sb_conn.x_coord, sb_conn.y_coord, sb_conn.layer_coord, sb_conn.to_side, sb_conn.from_side); (*sb_conns)[sb_conn_reverse].push_back(sb_reverse_edge); } } @@ -794,64 +925,85 @@ static int evaluate_num_conns_formula(t_wireconn_scratchpad* scratchpad, std::st return scratchpad->formula_parser.parse_formula(num_conns_formula, vars); } -/* Here we find the correct channel (x or y), and the coordinates to index into it based on the - * specified tile coordinates and the switchblock side. Also returns the type of channel - * that we are indexing into (ie, CHANX or CHANY */ -static const t_chan_details& index_into_correct_chan(int tile_x, int tile_y, enum e_side side, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, int* set_x, int* set_y, t_rr_type* chan_type) { - *chan_type = CHANX; - +static const t_chan_details& index_into_correct_chan(int tile_x, int tile_y, int tile_layer, enum e_side src_side, enum e_side dest_side, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, int& chan_x, int& chan_y, int& chan_layer, t_rr_type& chan_type) { + chan_type = CHANX; /* here we use the VPR convention that a tile 'owns' the channels directly to the right * and above it */ - switch (side) { + switch (src_side) { case TOP: - /* this is y-channel belonging to tile above */ - *set_x = tile_x; - *set_y = tile_y + 1; - *chan_type = CHANY; + /* this is y-channel belonging to tile above in the same layer */ + chan_x = tile_x; + chan_y = tile_y + 1; + chan_layer = tile_layer; + chan_type = CHANY; return chan_details_y; break; case RIGHT: - /* this is x-channel belonging to tile to the right */ - *set_x = tile_x + 1; - *set_y = tile_y; - *chan_type = CHANX; + /* this is x-channel belonging to tile to the right in the same layer */ + chan_x = tile_x + 1; + chan_y = tile_y; + chan_layer = tile_layer; + chan_type = CHANX; return chan_details_x; break; case BOTTOM: - /* this is y-channel on the right of the tile */ - *set_x = tile_x; - *set_y = tile_y; - *chan_type = CHANY; + /* this is y-channel on the right of the tile in the same layer */ + chan_x = tile_x; + chan_y = tile_y; + chan_type = CHANY; + chan_layer = tile_layer; return chan_details_y; break; case LEFT: - /* this is x-channel on top of the tile */ - *set_x = tile_x; - *set_y = tile_y; - *chan_type = CHANX; + /* this is x-channel on top of the tile in the same layer*/ + chan_x = tile_x; + chan_y = tile_y; + chan_type = CHANX; + chan_layer = tile_layer; return chan_details_x; break; + case ABOVE: + /* this is x-channel and y-channel on the same tile location in layer above the current layer */ + chan_x = tile_x; + chan_y = tile_y; + chan_layer = tile_layer + 1; + chan_type = (dest_side == RIGHT || dest_side == LEFT) ? CHANX : CHANY; + return (dest_side == RIGHT || dest_side == LEFT) ? chan_details_x : chan_details_y; + break; + case UNDER: + /* this is x-channel and y-channel on the same tile location in layer under the current layer */ + chan_x = tile_x; + chan_y = tile_y; + chan_layer = tile_layer - 1; + chan_type = (dest_side == RIGHT || dest_side == LEFT) ? CHANX : CHANY; + return (dest_side == RIGHT || dest_side == LEFT) ? chan_details_x : chan_details_y; + break; default: - VPR_FATAL_ERROR(VPR_ERROR_ARCH, "index_into_correct_chan: unknown side specified: %d\n", side); + VPR_FATAL_ERROR(VPR_ERROR_ARCH, "index_into_correct_chan: unknown side specified: %d\n", src_side); break; } VTR_ASSERT(false); return chan_details_x; //Unreachable } -/* checks whether the specified coordinates are out of bounds */ -static bool coords_out_of_bounds(const DeviceGrid& grid, int x_coord, int y_coord, e_rr_type chan_type) { +static bool coords_out_of_bounds(const DeviceGrid& grid, int x_coord, int y_coord, int layer_coord, e_rr_type chan_type) { bool result = true; + /* the layer that channel is located at must be legal regardless of chan_type*/ + if (layer_coord < 0 || layer_coord > grid.get_num_layers()) { + return result; + } + if (CHANX == chan_type) { - if (x_coord <= 0 || x_coord >= int(grid.width()) - 1 || /* there is no x-channel at x=0 */ - y_coord < 0 || y_coord >= int(grid.height()) - 1) { + /* there is no x-channel at x=0 */ + if (x_coord <= 0 || x_coord >= int(grid.width()) - 1 || y_coord < 0 || y_coord >= int(grid.height()) - 1) { result = true; } else { result = false; } } else if (CHANY == chan_type) { - if (x_coord < 0 || x_coord >= int(grid.width()) - 1 || y_coord <= 0 || y_coord >= int(grid.height()) - 1) { /* there is no y-channel at y=0 */ + /* there is no y-channel at y=0 */ + if (x_coord < 0 || x_coord >= int(grid.width()) - 1 || y_coord <= 0 || y_coord >= int(grid.height()) - 1) { result = true; } else { result = false; @@ -863,7 +1015,6 @@ static bool coords_out_of_bounds(const DeviceGrid& grid, int x_coord, int y_coor return result; } -/* returns the subsegment number of the specified wire at seg_coord */ static int get_wire_subsegment_num(const DeviceGrid& grid, e_rr_type chan_type, const t_chan_seg_details& wire_details, int seg_coord) { /* We get wire subsegment number by comparing the wire's seg_coord to the seg_start of the wire. * The offset between seg_start (or seg_end) and seg_coord is the subsegment number @@ -927,8 +1078,6 @@ int get_wire_segment_length(const DeviceGrid& grid, e_rr_type chan_type, const t return wire_length; } -/* Returns the switchpoint of the wire specified by wire_details at a segment coordinate - * of seg_coord, and connection to the sb_side of the switchblock */ static int get_switchpoint_of_wire(const DeviceGrid& grid, e_rr_type chan_type, const t_chan_seg_details& wire_details, int seg_coord, e_side sb_side) { /* this function calculates the switchpoint of a given wire by first calculating * the subsegmennt number of the specified wire. For instance, for a wire with L=4: diff --git a/vpr/src/route/build_switchblocks.h b/vpr/src/route/build_switchblocks.h index 68e60da3824..c0db665b956 100644 --- a/vpr/src/route/build_switchblocks.h +++ b/vpr/src/route/build_switchblocks.h @@ -19,23 +19,30 @@ class Switchblock_Lookup { public: int x_coord; /* x coordinate of switchblock connection */ //TODO: redundant comment?? add range int y_coord; /* y coordinate of switchblock connection */ + int layer_coord; /* layer number of switchblock */ e_side from_side; /* source side of switchblock connection */ e_side to_side; /* destination side of switchblock connection */ /* Empty constructor initializes everything to 0 */ Switchblock_Lookup() { - x_coord = y_coord = -1; //TODO: use set function + x_coord = y_coord = layer_coord = -1; //TODO: use set function } /* Constructor for initializing member variables */ + Switchblock_Lookup(int set_x, int set_y, int set_layer, e_side set_from, e_side set_to) { + this->set_coords(set_x, set_y, set_layer, set_from, set_to); //TODO: use set function + } + + /* Constructor for initializing member variables with default layer number (0), used for single die FPGA */ Switchblock_Lookup(int set_x, int set_y, e_side set_from, e_side set_to) { - this->set_coords(set_x, set_y, set_from, set_to); //TODO: use set function + this->set_coords(set_x, set_y, 0, set_from, set_to); } /* Function for setting the segment coordinates */ - void set_coords(int set_x, int set_y, e_side set_from, e_side set_to) { + void set_coords(int set_x, int set_y, int set_layer, e_side set_from, e_side set_to) { x_coord = set_x; y_coord = set_y; + layer_coord = set_layer; from_side = set_from; to_side = set_to; } @@ -44,7 +51,8 @@ class Switchblock_Lookup { bool operator==(const Switchblock_Lookup& obj) const { bool result; if (x_coord == obj.x_coord && y_coord == obj.y_coord - && from_side == obj.from_side && to_side == obj.to_side) { + && from_side == obj.from_side && to_side == obj.to_side + && layer_coord == obj.layer_coord) { result = true; } else { result = false; @@ -55,22 +63,33 @@ class Switchblock_Lookup { struct t_hash_Switchblock_Lookup { size_t operator()(const Switchblock_Lookup& obj) const noexcept { - //TODO: use vtr::hash_combine - size_t result; - result = ((((std::hash()(obj.x_coord) - ^ std::hash()(obj.y_coord) << 10) - ^ std::hash()((int)obj.from_side) << 20) - ^ std::hash()((int)obj.to_side) << 30)); - return result; + std::size_t hash = std::hash{}(obj.x_coord); + vtr::hash_combine(hash, obj.y_coord); + vtr::hash_combine(hash, obj.layer_coord); + vtr::hash_combine(hash, obj.from_side); + vtr::hash_combine(hash, obj.to_side); + return hash; } }; -/* contains the index of the destination wire segment within a channel - * and the index of the switch used to connect to it */ +/** + * @brief contains the required information to build an RR graph edge for a switch block connection + * + * @from_wire source wire ptc_num index in a channel + * @to_wire destination wire ptc_num index in a channel + * @switch_ind RR graph switch index that connects the source wire to the destination wire that connect two tracks in same layer + * @switch_ind_between_layers RR graph switch index that connects two tracks in different layers + * @from_wire_layer the layer index that the source wire is located at + * @to_wire_layer the layer index that the destination wire is located at + * + */ struct t_switchblock_edge { short from_wire; short to_wire; short switch_ind; + short switch_ind_between_layers; + short from_wire_layer; + short to_wire_layer; }; /* Switchblock connections are made as [x][y][from_side][to_side][from_wire_ind]. @@ -83,10 +102,27 @@ typedef std::unordered_map, /************ Functions ************/ -/* allocate and build switch block permutation map */ -t_sb_connection_map* alloc_and_load_switchblock_permutations(const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, const DeviceGrid& grid, std::vector switchblocks, t_chan_width* nodes_per_chan, enum e_directionality directionality, vtr::RandState& rand_state); +/** + * @brief allocates and builds switch block permutation map + * + * @param chan_details_x channel-x details (length, start and end points, ...) + * @param chan_details_y channel-y details (length, start and end points, ...) + * @param grid device grid + * @param inter_cluster_rr used to check if a certain layer contain inter-cluster programmable routing resources (wires and switch blocks) + * @param switchblocks switch block information extracted from the architecture file + * @param nodes_per_chan number of track in each channel (x,y) + * @param directionality specifies the switch block edges direction (unidirectional or bidirectional) + * @param rand_state initialize the random number generator (RNG) + * + * @return creates a map between switch blocks (key) and their corresponding edges (value). + */ +t_sb_connection_map* alloc_and_load_switchblock_permutations(const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, const DeviceGrid& grid, const std::vector& inter_cluster_rr, const std::vector& switchblocks, t_chan_width* nodes_per_chan, enum e_directionality directionality, vtr::RandState& rand_state); -/* deallocates switch block connections sparse array */ +/** + * @brief deallocates switch block connections sparse array + * + * @param sb_conns switch block permutation map + */ void free_switchblock_permutations(t_sb_connection_map* sb_conns); #endif diff --git a/vpr/src/route/clock_connection_builders.cpp b/vpr/src/route/clock_connection_builders.cpp index d7c41cae408..d7b0f831e44 100644 --- a/vpr/src/route/clock_connection_builders.cpp +++ b/vpr/src/route/clock_connection_builders.cpp @@ -283,7 +283,7 @@ void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_gra continue; } - for (e_side side : SIDES) { + for (e_side side : TOTAL_2D_SIDES) { //Don't connect pins which are not adjacent to channels around the perimeter if ((x == 0 && side != RIGHT) || (x == (int)grid.width() - 1 && side != LEFT) || (y == 0 && side != TOP) || (y == (int)grid.height() - 1 && side != BOTTOM)) { continue; diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp index 4074d6d283f..5409d5ec49f 100644 --- a/vpr/src/route/connection_router.cpp +++ b/vpr/src/route/connection_router.cpp @@ -147,7 +147,7 @@ std::tuple ConnectionRouter::timing_driven_route_conne if (cheapest == nullptr) { //Found no path, that may be due to an unlucky choice of existing route tree sub-set, //try again with the full route tree to be sure this is not an artifact of high-fanout routing - VTR_LOG_WARN("No routing path found in high-fanout mode for net connection (to sink_rr %d), retrying with full route tree\n", sink_node); + VTR_LOG_WARN("No routing path found in high-fanout mode for net %zu connection (to sink_rr %d), retrying with full route tree\n", size_t(conn_params.net_id_), sink_node); //Reset any previously recorded node costs so timing_driven_route_connection() //starts over from scratch. @@ -379,10 +379,11 @@ void ConnectionRouter::timing_driven_expand_cheapest(t_heap* cheapest, VTR_LOGV_DEBUG(router_debug_, " Better cost to %d\n", inode); VTR_LOGV_DEBUG(router_debug_, " New total cost: %g\n", new_total_cost); VTR_LOGV_DEBUG(router_debug_, " New back cost: %g\n", new_back_cost); - VTR_LOGV_DEBUG(router_debug_ && (rr_nodes_.node_type(RRNodeId(cheapest->index)) != t_rr_type::SOURCE), " Setting path costs for associated node %d (from %d edge %zu)\n", - cheapest->index, - static_cast(rr_graph_->edge_src_node(cheapest->prev_edge())), - static_cast(cheapest->prev_edge())); + VTR_LOGV_DEBUG(router_debug_ && (cheapest->prev_edge() != RREdgeId::INVALID()), + " Setting path costs for associated node %d (from %d edge %zu)\n", + cheapest->index, + static_cast(rr_graph_->edge_src_node(cheapest->prev_edge())), + static_cast(cheapest->prev_edge())); update_cheapest(cheapest, route_inf); @@ -977,6 +978,8 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( int target_bin_x = grid_to_bin_x(rr_graph_->node_xlow(target_node), spatial_rt_lookup); int target_bin_y = grid_to_bin_y(rr_graph_->node_ylow(target_node), spatial_rt_lookup); + auto target_layer = rr_graph_->node_layer(target_node); + int chan_nodes_added = 0; t_bb highfanout_bb; @@ -984,12 +987,13 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( highfanout_bb.xmax = rr_graph_->node_xhigh(target_node); highfanout_bb.ymin = rr_graph_->node_ylow(target_node); highfanout_bb.ymax = rr_graph_->node_yhigh(target_node); - highfanout_bb.layer_min = rr_graph_->node_layer(target_node); - highfanout_bb.layer_max = rr_graph_->node_layer(target_node); + highfanout_bb.layer_min = target_layer; + highfanout_bb.layer_max = target_layer; //Add existing routing starting from the target bin. //If the target's bin has insufficient existing routing add from the surrounding bins bool done = false; + bool found_node_on_same_layer = false; for (int dx : {0, -1, +1}) { size_t bin_x = target_bin_x + dx; @@ -1015,6 +1019,10 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( if (!inside_bb(rr_node_to_add, net_bounding_box)) continue; + auto rt_node_layer_num = rr_graph_->node_layer(rr_node_to_add); + if (rt_node_layer_num == target_layer) + found_node_on_same_layer = true; + // Put the node onto the heap add_route_tree_node_to_heap(rt_node, target_node, cost_params, net_bounding_box); @@ -1027,7 +1035,7 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( } constexpr int SINGLE_BIN_MIN_NODES = 2; - if (dx == 0 && dy == 0 && chan_nodes_added > SINGLE_BIN_MIN_NODES) { + if (dx == 0 && dy == 0 && chan_nodes_added > SINGLE_BIN_MIN_NODES && found_node_on_same_layer) { //Target bin contained at least minimum amount of routing // //We require at least SINGLE_BIN_MIN_NODES to be added. @@ -1041,7 +1049,7 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( if (done) break; } - if (chan_nodes_added == 0) { //If the target bin, and it's surrounding bins were empty, just add the full route tree + if (chan_nodes_added == 0 || !found_node_on_same_layer) { //If the target bin, and it's surrounding bins were empty, just add the full route tree add_route_tree_to_heap(rt_root, target_node, cost_params, net_bounding_box); return net_bounding_box; } else { diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index b72b78cdaf1..480919ae9b6 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -7,7 +7,6 @@ #include "route_common.h" #include "route_export.h" #include "rr_graph.h" -#include "re_cluster_util.h" /* The numbering relation between the channels and clbs is: * * * diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp index 256c4fcb933..e37744ab70a 100644 --- a/vpr/src/route/router_delay_profiling.cpp +++ b/vpr/src/route/router_delay_profiling.cpp @@ -51,8 +51,7 @@ RouterDelayProfiler::RouterDelayProfiler(const Netlist<>& net_list, bool RouterDelayProfiler::calculate_delay(RRNodeId source_node, RRNodeId sink_node, const t_router_opts& router_opts, - float* net_delay, - int layer_num) { + float* net_delay) { /* Returns true as long as found some way to hook up this net, even if that * * way resulted in overuse of resources (congestion). If there is no way * * to route this net, even ignoring congestion, it returns false. In this * @@ -83,14 +82,8 @@ bool RouterDelayProfiler::calculate_delay(RRNodeId source_node, bounding_box.xmax = device_ctx.grid.width() + 1; bounding_box.ymin = 0; bounding_box.ymax = device_ctx.grid.height() + 1; - // If layer num is not specified, it means the BB should cover all layers - if (layer_num == OPEN) { - bounding_box.layer_min = 0; - bounding_box.layer_max = device_ctx.grid.get_num_layers() - 1; - } else { - bounding_box.layer_min = layer_num; - bounding_box.layer_max = layer_num; - } + bounding_box.layer_min = 0; + bounding_box.layer_max = device_ctx.grid.get_num_layers() - 1; t_conn_cost_params cost_params; cost_params.criticality = 1.; @@ -109,6 +102,9 @@ bool RouterDelayProfiler::calculate_delay(RRNodeId source_node, -1, false, std::unordered_map()); + if (size_t(sink_node) == 778060 && size_t(source_node) == 14) { + router_.set_router_debug(true); + } std::tie(found_path, std::ignore, cheapest) = router_.timing_driven_route_connection_from_route_tree( tree.root(), sink_node, @@ -117,6 +113,8 @@ bool RouterDelayProfiler::calculate_delay(RRNodeId source_node, router_stats, conn_params); + router_.set_router_debug(false); + if (found_path) { VTR_ASSERT(cheapest.index == sink_node); diff --git a/vpr/src/route/router_delay_profiling.h b/vpr/src/route/router_delay_profiling.h index 71753a4cb91..bda721e1a24 100644 --- a/vpr/src/route/router_delay_profiling.h +++ b/vpr/src/route/router_delay_profiling.h @@ -29,8 +29,7 @@ class RouterDelayProfiler { bool calculate_delay(RRNodeId source_node, RRNodeId sink_node, const t_router_opts& router_opts, - float* net_delay, - int layer_num); + float* net_delay); /** * @param physical_tile_type_idx diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index c48ee90e073..deb48779d8a 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -1475,12 +1475,11 @@ static std::pair get_adjusted_rr_pin_position(const RRNodeId rr) { * However, current test show that the simple strategy provides * a good trade-off between runtime and quality of results */ - auto it = std::find_if(SIDES.begin(), SIDES.end(), [&](const e_side candidate_side) { + auto it = std::find_if(TOTAL_2D_SIDES.begin(), TOTAL_2D_SIDES.end(), [&](const e_side candidate_side) { return rr_graph.is_node_on_specific_side(rr, candidate_side); }); - - e_side rr_side = (it != SIDES.end()) ? *it : NUM_SIDES; - VTR_ASSERT_SAFE(NUM_SIDES != rr_side); + e_side rr_side = (it != TOTAL_2D_SIDES.end()) ? *it : NUM_2D_SIDES; + VTR_ASSERT_SAFE(NUM_2D_SIDES != rr_side); if (rr_side == LEFT) { x -= 1; diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index a5129dc5cf5..54b9f1ab11c 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -199,6 +199,7 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder const t_chan_width& chan_width, const int wire_to_ipin_switch, const int wire_to_pin_between_dice_switch, + const int custom_3d_sb_fanin_fanout, const int delayless_switch, const enum e_directionality directionality, bool* Fc_clipped, @@ -479,12 +480,13 @@ static std::unordered_set get_chain_pins(std::vector chai static void build_rr_chan(RRGraphBuilder& rr_graph_builder, const int layer, - const int i, - const int j, + const int x_coord, + const int y_coord, const t_rr_type chan_type, const t_track_to_pin_lookup& track_to_pin_lookup, t_sb_connection_map* sb_conn_map, const vtr::NdMatrix, 3>& switch_block_conn, + vtr::NdMatrix& num_of_3d_conns_custom_SB, const int cost_index_offset, const t_chan_width& nodes_per_chan, const DeviceGrid& grid, @@ -493,11 +495,32 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, const int Fs_per_side, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, - t_rr_edge_info_set& created_rr_edges, + t_rr_edge_info_set& rr_edges_to_create, + t_rr_edge_info_set& des_3d_rr_edges_to_create, const int wire_to_ipin_switch, const int wire_to_pin_between_dice_switch, + const int custom_3d_sb_fanin_fanout, + const int delayless_switch, const enum e_directionality directionality); +/** + * @brief builds the extra length-0 CHANX nodes to handle 3D custom switchblocks edges in the RR graph. + * @param rr_graph_builder RRGraphBuilder data structure which allows data modification on a routing resource graph + * @param layer switch block layer-coordinate + * @param x_coord switch block x_coordinate + * @param y_coord switch block y-coordinate + * @param const_index_offset index to the correct node type for RR node cost initialization + * @param nodes_per_chan number of tracks per channel (x, y) + * @param chan_details_x channel-x details (length, start and end points, ...) + */ +static void build_inter_die_custom_sb_rr_chan(RRGraphBuilder& rr_graph_builder, + const int layer, + const int x_coord, + const int y_coord, + const int const_index_offset, + const t_chan_width& nodes_per_chan, + const t_chan_details& chan_details_x); + void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create); void alloc_and_load_edges(RRGraphBuilder& rr_graph_builder, @@ -649,6 +672,7 @@ static void build_rr_graph(const t_graph_type graph_type, const int global_route_switch, const int wire_to_arch_ipin_switch, const int wire_to_pin_between_dice_switch, + const int custom_3d_sb_fanin_fanout, const int delayless_switch, const float R_minW_nmos, const float R_minW_pmos, @@ -752,6 +776,7 @@ void create_rr_graph(const t_graph_type graph_type, det_routing_arch->global_route_switch, det_routing_arch->wire_to_arch_ipin_switch, det_routing_arch->wire_to_arch_ipin_switch_between_dice, + router_opts.custom_3d_sb_fanin_fanout, det_routing_arch->delayless_switch, det_routing_arch->R_minW_nmos, det_routing_arch->R_minW_pmos, @@ -976,6 +1001,7 @@ static void build_rr_graph(const t_graph_type graph_type, const int global_route_switch, const int wire_to_arch_ipin_switch, const int wire_to_pin_between_dice_switch, + const int custom_3d_sb_fanin_fanout, const int delayless_switch, const float R_minW_nmos, const float R_minW_pmos, @@ -1228,13 +1254,14 @@ static void build_rr_graph(const t_graph_type graph_type, //is deterministic -- always producing the same RR graph. constexpr unsigned SWITCHPOINT_RNG_SEED = 1; vtr::RandState switchpoint_rand_state = SWITCHPOINT_RNG_SEED; + const auto inter_cluster_prog_rr = device_ctx.inter_cluster_prog_routing_resources; if (is_global_graph) { switch_block_conn = alloc_and_load_switch_block_conn(&nodes_per_chan, SUBSET, 3); } else if (BI_DIRECTIONAL == directionality) { if (sb_type == CUSTOM) { sb_conn_map = alloc_and_load_switchblock_permutations(chan_details_x, chan_details_y, - grid, + grid, inter_cluster_prog_rr, switchblocks, &nodes_per_chan, directionality, switchpoint_rand_state); } else { @@ -1246,6 +1273,7 @@ static void build_rr_graph(const t_graph_type graph_type, if (sb_type == CUSTOM) { sb_conn_map = alloc_and_load_switchblock_permutations(chan_details_x, chan_details_y, grid, + inter_cluster_prog_rr, switchblocks, &nodes_per_chan, directionality, switchpoint_rand_state); } else { @@ -1269,6 +1297,19 @@ static void build_rr_graph(const t_graph_type graph_type, } /* END SB LOOKUP */ + /* check whether RR graph need to allocate new nodes for 3D custom switch blocks. + * To avoid wasting memory, the data structures are only allocated if a custom switch block + * is described in the architecture file and we have more than one die in device grid. + */ + if (grid.get_num_layers() > 1 && sb_type == CUSTOM) { + //keep how many nodes each switchblock requires for each x,y location + auto extra_nodes_per_switchblock = get_number_track_to_track_inter_die_conn(sb_conn_map,custom_3d_sb_fanin_fanout, device_ctx.rr_graph_builder); + //allocate new nodes in each switchblocks + alloc_and_load_inter_die_rr_node_indices(device_ctx.rr_graph_builder, &nodes_per_chan, grid, extra_nodes_per_switchblock, &num_rr_nodes); + device_ctx.rr_graph_builder.resize_nodes(num_rr_nodes); + extra_nodes_per_switchblock.clear(); + } + /* START IPIN MAP */ /* Create ipin map lookups */ @@ -1362,6 +1403,7 @@ static void build_rr_graph(const t_graph_type graph_type, nodes_per_chan, wire_to_arch_ipin_switch, wire_to_pin_between_dice_switch, + custom_3d_sb_fanin_fanout, delayless_switch, directionality, &Fc_clipped, @@ -1454,6 +1496,7 @@ static void build_rr_graph(const t_graph_type graph_type, if (!chan_details_x.empty() || !chan_details_y.empty()) { free_chan_details(chan_details_x, chan_details_y); } + if (sb_conn_map) { free_switchblock_permutations(sb_conn_map); sb_conn_map = nullptr; @@ -1461,6 +1504,7 @@ static void build_rr_graph(const t_graph_type graph_type, track_to_pin_lookup_x.clear(); track_to_pin_lookup_y.clear(); + if (clb_to_clb_directs != nullptr) { delete[] clb_to_clb_directs; } @@ -2040,6 +2084,7 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder const t_chan_width& chan_width, const int wire_to_ipin_switch, const int wire_to_pin_between_dice_switch, + const int custom_3d_sb_fanin_fanout, const int delayless_switch, const enum e_directionality directionality, bool* Fc_clipped, @@ -2123,7 +2168,7 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder for (int layer = 0; layer < grid.get_num_layers(); layer++) { for (size_t i = 0; i < grid.width(); ++i) { for (size_t j = 0; j < grid.height(); ++j) { - for (e_side side : SIDES) { + for (e_side side : TOTAL_2D_SIDES) { if (BI_DIRECTIONAL == directionality) { build_bidir_rr_opins(rr_graph_builder, rr_graph, layer, i, j, side, opin_to_track_map, Fc_out, rr_edges_to_create, chan_details_x, @@ -2160,23 +2205,45 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder /* Build channels */ VTR_ASSERT(Fs % 3 == 0); - for (int layer = 0; layer < grid.get_num_layers(); ++layer) { - auto& device_ctx = g_vpr_ctx.device(); - /* Skip the current die if architecture file specifies that it doesn't require inter-cluster programmable resource routing */ - if (!device_ctx.inter_cluster_prog_routing_resources.at(layer)) { - continue; - } - for (size_t i = 0; i < grid.width() - 1; ++i) { - for (size_t j = 0; j < grid.height() - 1; ++j) { + /* In case of multi-die FPGA and a custom 3D SB, we keep track of how many 3D connections have been already made for each x,y location */ + vtr::NdMatrix num_of_3d_conns_custom_SB; + + t_rr_edge_info_set des_3d_rr_edges_to_create; + if(grid.get_num_layers() > 1 && sb_conn_map != nullptr){ + num_of_3d_conns_custom_SB.resize(std::array{grid.width(), grid.height()}, 0); + } + + for (size_t i = 0; i < grid.width() - 1; ++i) { + for (size_t j = 0; j < grid.height() - 1; ++j) { + for (int layer = 0; layer < grid.get_num_layers(); ++layer) { + auto &device_ctx = g_vpr_ctx.device(); + /* Skip the current die if architecture file specifies that it doesn't require inter-cluster programmable resource routing */ + if (!device_ctx.inter_cluster_prog_routing_resources.at(layer)) { + continue; + } + /* In multi-die FPGAs with track-to-track connections between layers, we need to load newly added length-0 CHANX nodes + * These extra nodes can be driven from many tracks in the source layer and can drive multiple tracks in the destination layer, + * since these die-crossing connections have more delays. + */ + if (grid.get_num_layers() > 1 && sb_conn_map != nullptr) { + //custom switch block defined in the architecture + VTR_ASSERT(sblock_pattern.empty() && switch_block_conn.empty()); + build_inter_die_custom_sb_rr_chan(rr_graph_builder, layer, i, j, CHANX_COST_INDEX_START, chan_width, + chan_details_x); + } + if (i > 0) { int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.x_list[j]); - build_rr_chan(rr_graph_builder, layer, i, j, CHANX, track_to_pin_lookup_x, sb_conn_map, switch_block_conn, - CHANX_COST_INDEX_START, + build_rr_chan(rr_graph_builder, layer, i, j, CHANX, track_to_pin_lookup_x, sb_conn_map, + switch_block_conn, + num_of_3d_conns_custom_SB, CHANX_COST_INDEX_START, chan_width, grid, tracks_per_chan, sblock_pattern, Fs / 3, chan_details_x, chan_details_y, - rr_edges_to_create, + rr_edges_to_create, des_3d_rr_edges_to_create, wire_to_ipin_switch, wire_to_pin_between_dice_switch, + custom_3d_sb_fanin_fanout, + delayless_switch, directionality); //Create the actual CHAN->CHAN edges @@ -2188,13 +2255,16 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder } if (j > 0) { int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.y_list[i]); - build_rr_chan(rr_graph_builder, layer, i, j, CHANY, track_to_pin_lookup_y, sb_conn_map, switch_block_conn, - CHANX_COST_INDEX_START + num_seg_types_x, + build_rr_chan(rr_graph_builder, layer, i, j, CHANY, track_to_pin_lookup_y, sb_conn_map, + switch_block_conn, + num_of_3d_conns_custom_SB, CHANX_COST_INDEX_START + num_seg_types_x, chan_width, grid, tracks_per_chan, sblock_pattern, Fs / 3, chan_details_x, chan_details_y, - rr_edges_to_create, + rr_edges_to_create, des_3d_rr_edges_to_create, wire_to_ipin_switch, wire_to_pin_between_dice_switch, + custom_3d_sb_fanin_fanout, + delayless_switch, directionality); //Create the actual CHAN->CHAN edges @@ -2207,6 +2277,15 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder } } } + + if(grid.get_num_layers() > 1 && sb_conn_map != nullptr){ + uniquify_edges(des_3d_rr_edges_to_create); + alloc_and_load_edges(rr_graph_builder, des_3d_rr_edges_to_create); + num_edges += des_3d_rr_edges_to_create.size(); + des_3d_rr_edges_to_create.clear(); + } + + VTR_LOG("CHAN->CHAN type edge count:%d\n", num_edges); num_edges = 0; std::function update_chan_width = [](t_chan_width*) noexcept {}; @@ -2399,7 +2478,7 @@ static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder, std::vector x_offset_vec; std::vector y_offset_vec; std::vector pin_sides_vec; - std::tie(x_offset_vec, y_offset_vec, pin_sides_vec) = get_pin_coordinates(physical_type, pin_num, std::vector(SIDES.begin(), SIDES.end())); + std::tie(x_offset_vec, y_offset_vec, pin_sides_vec) = get_pin_coordinates(physical_type, pin_num, std::vector(TOTAL_2D_SIDES.begin(), TOTAL_2D_SIDES.end())); VTR_ASSERT(!pin_sides_vec.empty()); for (int pin_coord = 0; pin_coord < (int)pin_sides_vec.size(); pin_coord++) { int x_offset = x_offset_vec[pin_coord]; @@ -3047,6 +3126,7 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, const t_track_to_pin_lookup& track_to_pin_lookup, t_sb_connection_map* sb_conn_map, const vtr::NdMatrix, 3>& switch_block_conn, + vtr::NdMatrix& num_of_3d_conns_custom_SB, const int cost_index_offset, const t_chan_width& nodes_per_chan, const DeviceGrid& grid, @@ -3056,8 +3136,11 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, t_rr_edge_info_set& rr_edges_to_create, + t_rr_edge_info_set& des_3d_rr_edges_to_create, const int wire_to_ipin_switch, const int wire_to_pin_between_dice_switch, + const int custom_3d_sb_fanin_fanout, + const int delayless_switch, const enum e_directionality directionality) { /* this function builds both x and y-directed channel segments, so set up our * coordinates based on channel type */ @@ -3137,10 +3220,10 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, } if (to_seg_details->length() > 0) { get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, chan_coord, - opposite_chan_type, seg_dimension, max_opposite_chan_width, grid, - Fs_per_side, sblock_pattern, node, rr_edges_to_create, - from_seg_details, to_seg_details, opposite_chan_details, - directionality, + opposite_chan_type, seg_dimension, max_opposite_chan_width, grid, + Fs_per_side, sblock_pattern, num_of_3d_conns_custom_SB, node, rr_edges_to_create, + des_3d_rr_edges_to_create, from_seg_details, to_seg_details, opposite_chan_details, + directionality,custom_3d_sb_fanin_fanout,delayless_switch, switch_block_conn, sb_conn_map); } } @@ -3157,10 +3240,10 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, } if (to_seg_details->length() > 0) { get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, chan_coord + 1, - opposite_chan_type, seg_dimension, max_opposite_chan_width, grid, - Fs_per_side, sblock_pattern, node, rr_edges_to_create, - from_seg_details, to_seg_details, opposite_chan_details, - directionality, switch_block_conn, sb_conn_map); + opposite_chan_type, seg_dimension, max_opposite_chan_width, grid, + Fs_per_side, sblock_pattern, num_of_3d_conns_custom_SB, node, rr_edges_to_create, + des_3d_rr_edges_to_create, from_seg_details, to_seg_details, opposite_chan_details, + directionality,custom_3d_sb_fanin_fanout, delayless_switch, switch_block_conn, sb_conn_map); } } @@ -3189,10 +3272,10 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, } if (to_seg_details->length() > 0) { get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, target_seg, - chan_type, seg_dimension, max_chan_width, grid, - Fs_per_side, sblock_pattern, node, rr_edges_to_create, - from_seg_details, to_seg_details, from_chan_details, - directionality, + chan_type, seg_dimension, max_chan_width, grid, + Fs_per_side, sblock_pattern, num_of_3d_conns_custom_SB, node, rr_edges_to_create, + des_3d_rr_edges_to_create, from_seg_details, to_seg_details, from_chan_details, + directionality,custom_3d_sb_fanin_fanout, delayless_switch, switch_block_conn, sb_conn_map); } } @@ -3227,6 +3310,55 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, } } +static void build_inter_die_custom_sb_rr_chan(RRGraphBuilder& rr_graph_builder, + const int layer, + const int x_coord, + const int y_coord, + const int const_index_offset, + const t_chan_width& nodes_per_chan, + const t_chan_details& chan_details_x) { + auto& mutable_device_ctx = g_vpr_ctx.mutable_device(); + const t_chan_seg_details* seg_details = chan_details_x[x_coord][y_coord].data(); + + /* 3D connections within the switch blocks use some extra length-0 CHANX node to allow a single 3D connection to be driven + * by multiple tracks in the source layer, and drives multiple tracks in the destination layer. + * These nodes has already been added to RRGraph builder, this function will go through all added nodes + * with specific location (layer, x_coord, y_coord) and sets their attributes. + * + * The extra length-0 nodes have the following attributes to make them distinigushable form normal chanx wires (e.g., length-4): + * 1) type: CHANX (could have used either CHANX or CHANY, we used CHANX) + * 2) ptc_num: [max_chan_width : max_chan_width + num_of_3d_connections - 1] + * 3) length: 0 + * 4) xhigh=xlow, yhigh=ylow + * 5) directionality: NONE (neither incremental nor decremental in 2D space) + */ + int start_track = nodes_per_chan.max; + int offset = 0; + + while (true) { //going through allocated nodes until no nodes are found within the RRGraph builder + RRNodeId node = rr_graph_builder.node_lookup().find_node(layer, x_coord, y_coord, CHANX, start_track + offset); + if (node) { + rr_graph_builder.set_node_layer(node, layer); + rr_graph_builder.set_node_coordinates(node, x_coord, y_coord, x_coord, y_coord); + rr_graph_builder.set_node_cost_index(node, RRIndexedDataId( + const_index_offset + seg_details[start_track - 1].index())); + rr_graph_builder.set_node_capacity(node, 1); /* GLOBAL routing handled elsewhere */ + float R = 0; + float C = 0; + rr_graph_builder.set_node_rc_index(node, NodeRCIndex( + find_create_rr_rc_data(R, C, mutable_device_ctx.rr_rc_data))); + + rr_graph_builder.set_node_type(node, CHANX); + rr_graph_builder.set_node_track_num(node, start_track + offset); + rr_graph_builder.set_node_direction(node, Direction::NONE); + + offset++; + } else { + break; + } + } +} + void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create) { std::stable_sort(rr_edges_to_create.begin(), rr_edges_to_create.end()); rr_edges_to_create.erase(std::unique(rr_edges_to_create.begin(), rr_edges_to_create.end()), rr_edges_to_create.end()); @@ -3350,7 +3482,7 @@ static vtr::NdMatrix alloc_and_load_pin_to_seg_type(const e_pin_type pin size_t(Type->width), //[0..width-1] size_t(Type->height), //[0..height-1] size_t(grid.get_num_layers()), //[0..layer-1] - NUM_SIDES, //[0..NUM_SIDES-1] + NUM_2D_SIDES, //[0..NUM_2D_SIDES-1] size_t(Fc) //[0..Fc-1] }, OPEN); //Unconnected @@ -3363,7 +3495,7 @@ static vtr::NdMatrix alloc_and_load_pin_to_seg_type(const e_pin_type pin size_t(Type->width), //[0..width-1] size_t(Type->height), //[0..height-1] size_t(grid.get_num_layers()), //[0..layer-1] - NUM_SIDES //[0..NUM_SIDES-1] + NUM_2D_SIDES //[0..NUM_2D_SIDES-1] }, 0); @@ -3376,7 +3508,7 @@ static vtr::NdMatrix alloc_and_load_pin_to_seg_type(const e_pin_type pin size_t(Type->width), //[0..width-1] size_t(Type->height), //[0..height-1] size_t(grid.get_num_layers()), //[0..layer-1] - NUM_SIDES, //[0..NUM_SIDES-1] + NUM_2D_SIDES, //[0..NUM_2D_SIDES-1] size_t(Type->num_pins) * size_t(grid.get_num_layers()) //[0..num_pins * num_layers-1] }, -1); //Defensive coding: Initialize to invalid @@ -3386,7 +3518,7 @@ static vtr::NdMatrix alloc_and_load_pin_to_seg_type(const e_pin_type pin size_t(Type->width), //[0..width-1] size_t(Type->height), //[0..height-1] size_t(grid.get_num_layers()), //[0..layer-1] - NUM_SIDES //[0..NUM_SIDES-1] + NUM_2D_SIDES //[0..NUM_2D_SIDES-1] }, 0); @@ -3403,7 +3535,7 @@ static vtr::NdMatrix alloc_and_load_pin_to_seg_type(const e_pin_type pin for (auto type_layer_index : type_layer) { for (int width = 0; width < Type->width; ++width) { for (int height = 0; height < Type->height; ++height) { - for (e_side side : SIDES) { + for (e_side side : TOTAL_2D_SIDES) { if (Type->pinloc[width][height][side][pin] == 1) { for (auto i = 0; i < (int)get_layers_connected_to_pin(Type, type_layer_index, pin).size(); i++) { dir_list[width][height][type_layer_index][side][num_dir[width][height][type_layer_index][side]] = pin; @@ -3422,7 +3554,7 @@ static vtr::NdMatrix alloc_and_load_pin_to_seg_type(const e_pin_type pin int num_phys_pins = 0; for (int width = 0; width < Type->width; ++width) { for (int height = 0; height < Type->height; ++height) { - for (e_side side : SIDES) { + for (e_side side : TOTAL_2D_SIDES) { num_phys_pins += num_dir[width][height][layer][side]; /* Num. physical pins per type */ } } @@ -3676,7 +3808,7 @@ static void load_uniform_connection_block_pattern(vtr::NdMatrix& tracks_ * counts will not get too big. */ std::vector>>> excess_tracks_selected; - excess_tracks_selected.resize(NUM_SIDES); + excess_tracks_selected.resize(NUM_2D_SIDES); for (int i = 0; i < num_phys_pins; ++i) { int width = pin_locations[i].width_offset; @@ -3686,7 +3818,7 @@ static void load_uniform_connection_block_pattern(vtr::NdMatrix& tracks_ max_height = std::max(max_height, height); } - for (int iside = 0; iside < NUM_SIDES; iside++) { + for (int iside = 0; iside < NUM_2D_SIDES; iside++) { excess_tracks_selected[iside].resize(max_width + 1); for (int dx = 0; dx <= max_width; dx++) { @@ -4296,7 +4428,7 @@ static int get_opin_direct_connections(RRGraphBuilder& rr_graph_builder, /* Find matching direct clb-to-clb connections with the same type as current grid location */ if (clb_to_clb_directs[i].from_clb_type == curr_type) { //We are at a valid starting point - if (directs[i].from_side != NUM_SIDES && directs[i].from_side != side) continue; + if (directs[i].from_side != NUM_2D_SIDES && directs[i].from_side != side) continue; //Offset must be in range if (x + directs[i].x_offset < int(device_ctx.grid.width() - 1) @@ -4364,7 +4496,7 @@ static int get_opin_direct_connections(RRGraphBuilder& rr_graph_builder, /* Add new ipin edge to list of edges */ std::vector inodes; - if (directs[i].to_side != NUM_SIDES) { + if (directs[i].to_side != NUM_2D_SIDES) { //Explicit side specified, only create if pin exists on that side RRNodeId inode = rr_graph_builder.node_lookup().find_node(layer, x + directs[i].x_offset, y + directs[i].y_offset, IPIN, ipin, directs[i].to_side); if (inode) { @@ -4509,7 +4641,7 @@ static RRNodeId pick_best_direct_connect_target_rr_node(const RRGraphView& rr_gr float best_dist = std::numeric_limits::infinity(); RRNodeId best_rr = RRNodeId::INVALID(); - for (const e_side& from_side : SIDES) { + for (const e_side& from_side : TOTAL_2D_SIDES) { /* Bypass those side where the node does not appear */ if (!rr_graph.is_node_on_specific_side(from_rr, from_side)) { continue; @@ -4520,7 +4652,7 @@ static RRNodeId pick_best_direct_connect_target_rr_node(const RRGraphView& rr_gr float to_dist = std::abs(rr_graph.node_xlow(from_rr) - rr_graph.node_xlow(to_rr)) + std::abs(rr_graph.node_ylow(from_rr) - rr_graph.node_ylow(to_rr)); - for (const e_side& to_side : SIDES) { + for (const e_side& to_side : TOTAL_2D_SIDES) { /* Bypass those side where the node does not appear */ if (!rr_graph.is_node_on_specific_side(to_rr, to_side)) { continue; diff --git a/vpr/src/route/rr_graph2.cpp b/vpr/src/route/rr_graph2.cpp index e2d922baaad..39c1e0ab697 100644 --- a/vpr/src/route/rr_graph2.cpp +++ b/vpr/src/route/rr_graph2.cpp @@ -93,8 +93,70 @@ static int get_unidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, RRNodeId from_rr_node, t_rr_edge_info_set& rr_edges_to_create); +/** + * @brief creates the RR graph edges corresponding to switch blocks permutation map + * + * @param rr_graph_builder RRGraphBuilder data structure which allows data modification on a routing resource graph + * @param tile_x x-coordinate of the switch block + * @param tile_y y-coordinate of the switch block + * @param layer layer-coordinate of the switch block + * @param max_chan_width number of available tracks within the channel + * @param from_side switch block connection source side + * @param from_wire switch block connection source wire index (ptc_num) within the channel + * @param from_rr_node switch block connection source wire RRNode index + * @param to_side switch block connection destination side + * @param to_x switch block connection destination x-coordinate + * @param to_y switch block connection destination y-coordinate + * @param to_chan_type switch block connection destination channel type (CHANX or CHANY) + * @param switch_override used to set the correct switch index for the RR graph edge + * @param sb_conn_map switch block permutation map + * @param rr_edges_to_create Total RR edges count + * @param edge_count number of RR edges that this function creates + */ +static void get_switchblocks_edges(RRGraphBuilder& rr_graph_builder, + const int tile_x, + const int tile_y, + const int layer, + const int max_chan_width, + const e_side from_side, + const int from_wire, + RRNodeId from_rr_node, + const e_side to_side, + const int to_x, + const int to_y, + const t_rr_type to_chan_type, + const int switch_override, + const int custom_3d_sb_fanin_fanout, + const int delayless_switch, + t_sb_connection_map* sb_conn_map, + vtr::NdMatrix& num_of_3d_conns_custom_SB, + t_rr_edge_info_set& rr_edges_to_create, + t_rr_edge_info_set& des_3d_rr_edges_to_create, + int& edge_count); + +/* + * @brief Figures out the edges that should connect the given wire segment to the given channel segment, adds these edges to 'rr_edge_to_create' + * + * @param rr_graph_builder RRGraphBuilder data structure which allows data modification on a routing resource graph + * @param layer the channel segment layer-coordinate + * @param max_chan_width number of tracks per channel + * @param from_track source track index (ptc_num) within the channel + * @param to_chan destination coordinate (x or y) based on chan type + * @param to_seg destination segment coordinate (x or y) based on chan type + * @param to_chan_type destination wire segment channel type (CHANX or CHANY) + * @param from_side swtich block connection source side + * @param to_side swtich block connection destination side + * @param swtich_override used to set the correct switch index for the RR graph edge + * @param sb_conn_map switch block permutation map, created based on the architecture file + * @param from_rr_node the source wire segment RRNodeID + * @param rr_edges_to_create keeps the created edges + * + * @return the number of edges added to 'rr_edge_to_create' + */ + static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, const int layer, + const int max_chan_width, const int from_track, const int to_chan, const int to_seg, @@ -102,9 +164,49 @@ static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, const e_side from_side, const e_side to_side, const int swtich_override, + const int custom_3d_sb_fanin_fanout, + const int delayless_switch, t_sb_connection_map* sb_conn_map, + vtr::NdMatrix& num_of_3d_conns_custom_SB, RRNodeId from_rr_node, - t_rr_edge_info_set& rr_edges_to_create); + t_rr_edge_info_set& rr_edges_to_create, + t_rr_edge_info_set& des_3d_rr_edges_to_create); + +/** + * @brief checks if a specific switch block edge is crossing any layer to create 3D custom switch blocks + * + * @param src_side switch block edge source side + * @param dest_side switch block edge destination side + * + * @return true if the connection going to another layer, false otherwise. + */ +static bool is_sb_conn_layer_crossing(enum e_side src_side, enum e_side dest_side); + +/** + * @brief finds corresponding RR nodes for a 3D SB edge and fill 3D custom switch block information (offset to correct extra CHANX nodes, source tracks, ..) + * + * @param rr_graph_builder RRGraphBuilder data structure which allows data modification on a routing resource graph + * @param x x-coordinate of both src and dest wires + * @param y y-coordinate of both src and dest wires + * @param from_wire from wire index within channel width + * @param from_wire_layer from wire layer-coordinate + * @param from_wire_type from wire type (CHANX/CHANY) + * @param to_wire to wire index within channel width + * @param to_wire_layer to wire layer-coordinate + * @param to_wire_type to wire type (CHANX/CHANY) + * @param curr_switchblocks_offset number of extra nodes that has already been added to the current switch block + * + * @ returns whether both source track RR node and destination RR node exist + */ +static bool check_3d_SB_RRnodes(RRGraphBuilder& rr_graph_builder, + int x, + int y, + int from_wire, + int from_wire_layer, + e_rr_type from_wire_type, + int to_wire, + int to_wire_layer, + e_rr_type to_wire_type); static int vpr_to_phy_track(const int itrack, const int chan_num, @@ -703,7 +805,7 @@ int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder, num_conn = 0; /* [0..device_ctx.num_block_types-1][0..num_pins-1][0..width][0..height][0..3][0..Fc-1] */ - for (e_side side : SIDES) { + for (e_side side : TOTAL_2D_SIDES) { /* Figure out coords of channel segment based on side */ tr_i = ((side == LEFT) ? (i - 1) : i); tr_j = ((side == BOTTOM) ? (j - 1) : j); @@ -1134,6 +1236,122 @@ static void load_chan_rr_indices(const int max_chan_width, } } +static bool is_sb_conn_layer_crossing(enum e_side src_side, enum e_side dest_side) { + if (src_side < NUM_2D_SIDES && dest_side < NUM_2D_SIDES) { + return false; + } + + if (src_side == dest_side) { + return false; + } + + return true; +} + +static bool check_3d_SB_RRnodes(RRGraphBuilder& rr_graph_builder, int x, int y, int from_wire, int from_wire_layer, e_rr_type from_wire_type, int to_wire, int to_wire_layer, e_rr_type to_wire_type) { + RRNodeId from_inode = rr_graph_builder.node_lookup().find_node(from_wire_layer, x, y, from_wire_type, from_wire); + RRNodeId to_inode = rr_graph_builder.node_lookup().find_node(to_wire_layer, x, y, to_wire_type, to_wire); + + if (from_inode && to_inode) { + return true; + } + + return false; +} + +vtr::NdMatrix get_number_track_to_track_inter_die_conn(t_sb_connection_map* sb_conn_map, + const int custom_3d_sb_fanin_fanout, + RRGraphBuilder& rr_graph_builder) { + auto& grid_ctx = g_vpr_ctx.device().grid; + vtr::NdMatrix extra_nodes_per_switchblocks; + extra_nodes_per_switchblocks.resize(std::array{grid_ctx.width(), grid_ctx.height()}, 0); + + for (size_t y = 0; y < grid_ctx.height(); y++) { + for (size_t x = 0; x < grid_ctx.width(); x++) { + for (auto layer = 0; layer < grid_ctx.get_num_layers(); layer++) { + int num_of_3d_conn = 0; + for (auto from_side : TOTAL_3D_SIDES) { + for (auto to_side : TOTAL_3D_SIDES) { + if (!is_sb_conn_layer_crossing(from_side, to_side)) { //this connection is not crossing any layer + continue; + } else { + Switchblock_Lookup sb_coord(x, y, layer, from_side, to_side); + if (sb_conn_map->count(sb_coord) > 0) { + std::vector& conn_vector = (*sb_conn_map)[sb_coord]; + for (int iconn = 0; iconn < (int)conn_vector.size(); ++iconn) { + //check if both from_node and to_node exists in the rr-graph + //CHANY -> CHANX connection + if(check_3d_SB_RRnodes(rr_graph_builder, x, y, conn_vector[iconn].from_wire, + conn_vector[iconn].from_wire_layer, CHANY, + conn_vector[iconn].to_wire, conn_vector[iconn].to_wire_layer, + CHANX)){ + num_of_3d_conn++; + } + //CHANX -> CHANY connection + if(check_3d_SB_RRnodes(rr_graph_builder, x, y, conn_vector[iconn].from_wire, + conn_vector[iconn].from_wire_layer, CHANX, + conn_vector[iconn].to_wire, conn_vector[iconn].to_wire_layer, + CHANY)){ + num_of_3d_conn++; + } + } + } + } + } + } + extra_nodes_per_switchblocks[x][y] += ((num_of_3d_conn + custom_3d_sb_fanin_fanout - 1)/ custom_3d_sb_fanin_fanout); + } + } + } + return extra_nodes_per_switchblocks; +} + +void alloc_and_load_inter_die_rr_node_indices(RRGraphBuilder& rr_graph_builder, + const t_chan_width* nodes_per_chan, + const DeviceGrid& grid, + const vtr::NdMatrix& extra_nodes_per_switchblock, + int* index) { + /* + * In case of multi-die FPGAs, we add extra nodes (could have used either CHANX or CHANY; we chose to use all CHANX) to + * support inter-die communication coming from switch blocks (connection between two tracks in different layers) + * The extra nodes have the following attribute: + * 1) type = CHANX + * 2) length = 0 (xhigh = xlow, yhigh = ylow) + * 3) ptc = [max_chanx_width:max_chanx_width+number_of_connection-1] + * 4) direction = NONE + */ + auto& device_ctx = g_vpr_ctx.device(); + + for (int layer = 0; layer < grid.get_num_layers(); layer++) { + /* Skip the current die if architecture file specifies that it doesn't have global resource routing */ + if (!device_ctx.inter_cluster_prog_routing_resources.at(layer)) { + continue; + } + for (size_t y = 0; y < grid.height() - 1; ++y) { + for (size_t x = 1; x < grid.width() - 1; ++x) { + //count how many track-to-track connection go from current layer to other layers + int conn_count = extra_nodes_per_switchblock[x][y]; + + //skip if no connection is required + if (conn_count == 0) { + continue; + } + + //reserve extra nodes for inter-die track-to-track connection + rr_graph_builder.node_lookup().reserve_nodes(layer, y, x, CHANX, conn_count + nodes_per_chan->max); + for (int rr_node_offset = 0; rr_node_offset < conn_count; rr_node_offset++) { + RRNodeId inode = rr_graph_builder.node_lookup().find_node(layer, x, y, CHANX, nodes_per_chan->max + rr_node_offset); + if (!inode) { + inode = RRNodeId(*index); + ++(*index); + rr_graph_builder.node_lookup().add_node(inode, layer, y, x, CHANX, nodes_per_chan->max + rr_node_offset); + } + } + } + } + } +} + /* As the rr_indices builders modify a local copy of indices, use the local copy in the builder * TODO: these building functions should only talk to a RRGraphBuilder object */ @@ -1215,7 +1433,7 @@ static void load_block_rr_indices(RRGraphBuilder& rr_graph_builder, * Deposit all the sides */ if (wanted_sides.empty()) { - for (e_side side : {TOP, BOTTOM, LEFT, RIGHT}) { + for (e_side side : TOTAL_2D_SIDES) { wanted_sides.push_back(side); } } @@ -1301,8 +1519,8 @@ static void add_classes_spatial_lookup(RRGraphBuilder& rr_graph_builder, int* index) { for (int x_tile = root_x; x_tile < (root_x + block_width); x_tile++) { for (int y_tile = root_y; y_tile < (root_y + block_height); y_tile++) { - rr_graph_builder.node_lookup().reserve_nodes(layer, x_tile, y_tile, SOURCE, class_num_vec.size(), SIDES[0]); - rr_graph_builder.node_lookup().reserve_nodes(layer, x_tile, y_tile, SINK, class_num_vec.size(), SIDES[0]); + rr_graph_builder.node_lookup().reserve_nodes(layer, x_tile, y_tile, SOURCE, class_num_vec.size(), TOTAL_2D_SIDES[0]); + rr_graph_builder.node_lookup().reserve_nodes(layer, x_tile, y_tile, SINK, class_num_vec.size(), TOTAL_2D_SIDES[0]); } } @@ -1352,9 +1570,9 @@ void alloc_and_load_rr_node_indices(RRGraphBuilder& rr_graph_builder, /* Alloc the lookup table */ for (t_rr_type rr_type : RR_TYPES) { if (rr_type == CHANX) { - rr_graph_builder.node_lookup().resize_nodes(grid.get_num_layers(), grid.height(), grid.width(), rr_type, NUM_SIDES); + rr_graph_builder.node_lookup().resize_nodes(grid.get_num_layers(), grid.height(), grid.width(), rr_type, NUM_2D_SIDES); } else { - rr_graph_builder.node_lookup().resize_nodes(grid.get_num_layers(), grid.width(), grid.height(), rr_type, NUM_SIDES); + rr_graph_builder.node_lookup().resize_nodes(grid.get_num_layers(), grid.width(), grid.height(), rr_type, NUM_2D_SIDES); } } @@ -1459,7 +1677,6 @@ bool verify_rr_node_indices(const DeviceGrid& grid, if (rr_graph.node_type(inode) == CHANX) { VTR_ASSERT_MSG(rr_graph.node_ylow(inode) == rr_graph.node_yhigh(inode), "CHANX should be horizontal"); - if (y != rr_graph.node_ylow(inode)) { VPR_ERROR(VPR_ERROR_ROUTE, "RR node y position does not agree between rr_nodes (%d) and rr_node_indices (%d): %s", rr_graph.node_ylow(inode), @@ -1533,8 +1750,8 @@ bool verify_rr_node_indices(const DeviceGrid& grid, * This check code should be invalid * if (rr_node.side() != side) { * VPR_ERROR(VPR_ERROR_ROUTE, "RR node xlow does not match between rr_nodes and rr_node_indices (%s/%s): %s", - * SIDE_STRING[rr_node.side()], - * SIDE_STRING[side], + * TOTAL_2D_SIDE_STRINGS[rr_node.side()], + * TOTAL_2D_SIDE_STRINGS[side], * describe_rr_node(rr_graph, grid, rr_indexed_data, inode).c_str()); * } else { * VTR_ASSERT(rr_node.side() == side); @@ -1702,12 +1919,16 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, const DeviceGrid& grid, const int Fs_per_side, t_sblock_pattern& sblock_pattern, + vtr::NdMatrix& num_of_3d_conns_custom_SB, RRNodeId from_rr_node, t_rr_edge_info_set& rr_edges_to_create, + t_rr_edge_info_set& des_3d_rr_edges_to_create, const t_chan_seg_details* from_seg_details, const t_chan_seg_details* to_seg_details, const t_chan_details& to_chan_details, const enum e_directionality directionality, + const int custom_3d_sb_fanin_fanout, + const int delayless_switch, const vtr::NdMatrix, 3>& switch_block_conn, t_sb_connection_map* sb_conn_map) { int to_chan, to_sb; @@ -1837,10 +2058,10 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, if (sb_seg < end_sb_seg) { if (custom_switch_block) { if (Direction::DEC == from_seg_details[from_track].direction() || BI_DIRECTIONAL == directionality) { - num_conn += get_track_to_chan_seg(rr_graph_builder, layer, from_track, to_chan, to_seg, + num_conn += get_track_to_chan_seg(rr_graph_builder, layer, max_chan_width, from_track, to_chan, to_seg, to_type, from_side_a, to_side, - switch_override, - sb_conn_map, from_rr_node, rr_edges_to_create); + switch_override, custom_3d_sb_fanin_fanout, delayless_switch, + sb_conn_map, num_of_3d_conns_custom_SB, from_rr_node, rr_edges_to_create, des_3d_rr_edges_to_create); } } else { if (BI_DIRECTIONAL == directionality) { @@ -1875,10 +2096,10 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, if (sb_seg > start_sb_seg) { if (custom_switch_block) { if (Direction::INC == from_seg_details[from_track].direction() || BI_DIRECTIONAL == directionality) { - num_conn += get_track_to_chan_seg(rr_graph_builder, layer, from_track, to_chan, to_seg, + num_conn += get_track_to_chan_seg(rr_graph_builder, layer, max_chan_width, from_track, to_chan, to_seg, to_type, from_side_b, to_side, - switch_override, - sb_conn_map, from_rr_node, rr_edges_to_create); + switch_override,custom_3d_sb_fanin_fanout, delayless_switch, + sb_conn_map, num_of_3d_conns_custom_SB, from_rr_node, rr_edges_to_create, des_3d_rr_edges_to_create); } } else { if (BI_DIRECTIONAL == directionality) { @@ -2010,13 +2231,127 @@ static int get_bidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, return num_conn; } -/* Figures out the edges that should connect the given wire segment to the given - * channel segment, adds these edges to 'edge_list' and returns the number of - * edges added . - * See route/build_switchblocks.c for a detailed description of how the switch block - * connection map sb_conn_map is generated. */ +static void get_switchblocks_edges(RRGraphBuilder& rr_graph_builder, + const int tile_x, + const int tile_y, + const int layer, + const int max_chan_width, + const e_side from_side, + const int from_wire, + RRNodeId from_rr_node, + const e_side to_side, + const int to_x, + const int to_y, + const t_rr_type to_chan_type, + const int switch_override, + const int custom_3d_sb_fanin_fanout, + const int delayless_switch, + t_sb_connection_map* sb_conn_map, + vtr::NdMatrix& num_of_3d_conns_custom_SB, + t_rr_edge_info_set& rr_edges_to_create, + t_rr_edge_info_set& des_3d_rr_edges_to_create, + int& edge_count) { + auto& device_ctx = g_vpr_ctx.device(); + + /* get coordinate to index into the SB map */ + Switchblock_Lookup sb_coord(tile_x, tile_y, layer, from_side, to_side); + if (sb_conn_map->count(sb_coord) > 0) { + /* get reference to the connections vector which lists all destination wires for a given source wire + * at a specific coordinate sb_coord */ + std::vector& conn_vector = (*sb_conn_map)[sb_coord]; + + /* go through the connections... */ + for (int iconn = 0; iconn < (int)conn_vector.size(); ++iconn) { + if (conn_vector.at(iconn).from_wire != from_wire) continue; + + int to_wire = conn_vector.at(iconn).to_wire; + int to_layer = conn_vector.at(iconn).to_wire_layer; + /* Get the index of the switch connecting the two wires */ + int src_switch = conn_vector[iconn].switch_ind; + /* Get the index of the switch connecting the two wires in two layers */ + int src_switch_betwen_layers = conn_vector[iconn].switch_ind_between_layers; + + if (to_layer == layer) { //track-to-track connection within the same layer + RRNodeId to_node = rr_graph_builder.node_lookup().find_node(to_layer, to_x, to_y, to_chan_type, to_wire); + + if (!to_node) { + continue; + } + //Apply any switch overrides + if (should_apply_switch_override(switch_override)) { + src_switch = switch_override; + } + + rr_edges_to_create.emplace_back(from_rr_node, to_node, src_switch, false); + ++edge_count; + + if (device_ctx.arch_switch_inf[src_switch].directionality() == BI_DIRECTIONAL) { + //Add reverse edge since bi-directional + rr_edges_to_create.emplace_back(to_node, from_rr_node, src_switch, false); + ++edge_count; + } + } else { //track-to_track connection crossing layer + VTR_ASSERT(to_layer != layer); + //check if current connection is valid, since switch block pattern is very general, + //we might see invalid layer in connection, so we just skip those + if ((layer < 0 || layer >= device_ctx.grid.get_num_layers()) || (to_layer < 0 || to_layer >= device_ctx.grid.get_num_layers())) { + continue; + } + + if (tile_x != to_x || tile_y != to_y) { + continue; + } + + /* + * In order to connect two tracks in different layers, we need to follow these three steps: + * 1) connect "from_tracks" to extra "chanx" node in the same switch blocks + * 2) connect extra "chanx" node located in from_layer to another extra "chanx" node located in to_layer + * 3) connect "chanx" node located in to_layer to "to_track" + * + * +-------------+ +-------------+ +--------------+ +--------------+ + * | from_wire | -----> | extra_chanx | ------> | extra_chanx | ------> | to_wire | + * | (src_layer) | | (src_layer) | | (dest_layer) | | (dest_layer) | + * +-------------+ +-------------+ +--------------+ +--------------+ + * + * */ + int offset = num_of_3d_conns_custom_SB[tile_x][tile_y] / custom_3d_sb_fanin_fanout; + RRNodeId track_to_chanx_node = rr_graph_builder.node_lookup().find_node(layer, tile_x, tile_y, CHANX, max_chan_width + offset); + RRNodeId diff_layer_chanx_node = rr_graph_builder.node_lookup().find_node(to_layer, tile_x, tile_y, CHANX, max_chan_width + offset); + RRNodeId chanx_to_track_node = rr_graph_builder.node_lookup().find_node(to_layer, to_x, to_y, to_chan_type, to_wire); + + if (!track_to_chanx_node || !diff_layer_chanx_node || !chanx_to_track_node) { + continue; + } + + //Apply any switch overrides + if (should_apply_switch_override(switch_override)) { + src_switch = switch_override; + } + + //add edge between source node at from layer to intermediate node + rr_edges_to_create.emplace_back(from_rr_node, track_to_chanx_node, delayless_switch, false); + ++edge_count; + + //add edge between intermediate node to destination node at to layer + //might add the same edge more than once + des_3d_rr_edges_to_create.emplace_back(diff_layer_chanx_node, chanx_to_track_node, src_switch_betwen_layers, false); + ++edge_count; + + //we only add the following edge between intermediate nodes once for the first 3D connection for each pair of intermediate nodes + if (num_of_3d_conns_custom_SB[tile_x][tile_y] % custom_3d_sb_fanin_fanout == 0) { + rr_edges_to_create.emplace_back(track_to_chanx_node, diff_layer_chanx_node, delayless_switch, false); + ++edge_count; + } + + num_of_3d_conns_custom_SB[tile_x][tile_y]++; + } + } + } +} + static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, const int layer, + const int max_chan_width, const int from_wire, const int to_chan, const int to_seg, @@ -2024,9 +2359,13 @@ static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, const e_side from_side, const e_side to_side, const int switch_override, + const int custom_3d_sb_fanin_fanout, + const int delayless_switch, t_sb_connection_map* sb_conn_map, + vtr::NdMatrix& num_of_3d_conns_custom_SB, RRNodeId from_rr_node, - t_rr_edge_info_set& rr_edges_to_create) { + t_rr_edge_info_set& rr_edges_to_create, + t_rr_edge_info_set& des_3d_rr_edges_to_create) { int edge_count = 0; int to_x, to_y; int tile_x, tile_y; @@ -2047,45 +2386,49 @@ static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, } } - /* get coordinate to index into the SB map */ - Switchblock_Lookup sb_coord(tile_x, tile_y, from_side, to_side); - if (sb_conn_map->count(sb_coord) > 0) { - /* get reference to the connections vector which lists all destination wires for a given source wire - * at a specific coordinate sb_coord */ - std::vector& conn_vector = (*sb_conn_map)[sb_coord]; - - /* go through the connections... */ - for (int iconn = 0; iconn < (int)conn_vector.size(); ++iconn) { - if (conn_vector.at(iconn).from_wire != from_wire) continue; - - int to_wire = conn_vector.at(iconn).to_wire; - RRNodeId to_node = rr_graph_builder.node_lookup().find_node(layer, to_x, to_y, to_chan_type, to_wire); - - if (!to_node) { - continue; - } - - /* Get the index of the switch connecting the two wires */ - int src_switch = conn_vector[iconn].switch_ind; - - //Apply any switch overrides - if (should_apply_switch_override(switch_override)) { - src_switch = switch_override; - } - - rr_edges_to_create.emplace_back(from_rr_node, to_node, src_switch, false); - ++edge_count; - - auto& device_ctx = g_vpr_ctx.device(); - - if (device_ctx.arch_switch_inf[src_switch].directionality() == BI_DIRECTIONAL) { - //Add reverse edge since bi-directional - rr_edges_to_create.emplace_back(to_node, from_rr_node, src_switch, false); - ++edge_count; - } - } - } else { - /* specified sb_conn_map entry does not exist -- do nothing */ + get_switchblocks_edges(rr_graph_builder, + tile_x, + tile_y, + layer, + max_chan_width, + from_side, + from_wire, + from_rr_node, + to_side, + to_x, + to_y, + to_chan_type, + switch_override, + custom_3d_sb_fanin_fanout, + delayless_switch, + sb_conn_map, + num_of_3d_conns_custom_SB, + rr_edges_to_create, + des_3d_rr_edges_to_create, + edge_count); + + //check sb_conn_map for connections between two layers + for (e_side to_another_die_side : {ABOVE, UNDER}) { + get_switchblocks_edges(rr_graph_builder, + tile_x, + tile_y, + layer, + max_chan_width, + from_side, + from_wire, + from_rr_node, + to_another_die_side, + to_x, + to_y, + to_chan_type, + switch_override, + custom_3d_sb_fanin_fanout, + delayless_switch, + sb_conn_map, + num_of_3d_conns_custom_SB, + rr_edges_to_create, + des_3d_rr_edges_to_create, + edge_count); } return edge_count; } @@ -2411,15 +2754,15 @@ void load_sblock_pattern_lookup(const int i, /* SB's range from (0, 0) to (grid.width() - 2, grid.height() - 2) */ /* First find all four sides' incoming wires */ - static_assert(NUM_SIDES == 4, "Should be 4 sides"); - std::array, NUM_SIDES> wire_mux_on_track; - std::array, NUM_SIDES> incoming_wire_label; - int num_incoming_wires[NUM_SIDES]; - int num_ending_wires[NUM_SIDES]; - int num_wire_muxes[NUM_SIDES]; + static_assert(NUM_2D_SIDES == 4, "Should be 4 sides"); + std::array, NUM_2D_SIDES> wire_mux_on_track; + std::array, NUM_2D_SIDES> incoming_wire_label; + int num_incoming_wires[NUM_2D_SIDES]; + int num_ending_wires[NUM_2D_SIDES]; + int num_wire_muxes[NUM_2D_SIDES]; /* "Label" the wires around the switch block by connectivity. */ - for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) { + for (e_side side : TOTAL_2D_SIDES) { /* Assume the channel segment doesn't exist. */ wire_mux_on_track[side].clear(); incoming_wire_label[side].clear(); @@ -2493,7 +2836,7 @@ void load_sblock_pattern_lookup(const int i, false, wire_mux_on_track[side], &num_wire_muxes[side], &dummy); } - for (e_side to_side : {TOP, RIGHT, BOTTOM, LEFT}) { + for (e_side to_side : TOTAL_2D_SIDES) { /* Can't do anything if no muxes on this side. */ if (num_wire_muxes[to_side] == 0) continue; @@ -2756,7 +3099,7 @@ static void label_incoming_wires(const int chan_num, static int find_label_of_track(const std::vector& wire_mux_on_track, int num_wire_muxes, int from_track) { - /* Returns the index/label in array wire_mux_on_track whose entry equals from_track. If none are + /* Returns the index/label in array wire_mux_on_track whose entry equals from_tracks. If none are * found, then returns the index of the entry whose value is the largest */ int i_label = -1; int max_track = -1; diff --git a/vpr/src/route/rr_graph2.h b/vpr/src/route/rr_graph2.h index b1651f6ccda..348e5633708 100644 --- a/vpr/src/route/rr_graph2.h +++ b/vpr/src/route/rr_graph2.h @@ -29,6 +29,22 @@ void alloc_and_load_rr_node_indices(RRGraphBuilder& rr_graph_builder, const t_chan_details& chan_details_y, bool is_flat); +/** + * @brief allocates extra nodes within the RR graph to support 3D custom switch blocks for multi-die FPGAs + * + * @param rr_graph_builder RRGraphBuilder data structure which allows data modification on a routing resource graph + * @param nodes_per_chan number of tracks per channel (x, y) + * @param grid device grid + * @param extra_nodes_per_switchblock keeps how many extra length-0 CHANX node is required for each unique (x,y) location within the grid. + * Number of these extra nodes are exactly the same for all layers. Hence, we only keep it for one layer. ([0..grid.width-1][0..grid.height-1) + * @param index RRNodeId that should be assigned to add a new RR node to the RR graph + */ +void alloc_and_load_inter_die_rr_node_indices(RRGraphBuilder& rr_graph_builder, + const t_chan_width* nodes_per_chan, + const DeviceGrid& grid, + const vtr::NdMatrix& extra_nodes_per_switchblock, + int* index); + void alloc_and_load_tile_rr_node_indices(RRGraphBuilder& rr_graph_builder, t_physical_tile_type_ptr physical_tile, int layer, @@ -61,7 +77,18 @@ int get_rr_node_index(const t_rr_node_indices& L_rr_node_indices, int y, t_rr_type rr_type, int ptc, - e_side side = NUM_SIDES); + e_side side = NUM_2D_SIDES); +/** + * @brief goes through 3D custom switch blocks and counts how many connections are crossing dice for each switch block. + * + * @param sb_conn_map switch block permutation map + * @param rr_graph_builder RRGraphBuilder data structure which allows data modification on a routing resource graph + * + * @return number of die-crossing connection for each unique (x, y) location within the grid ([0..grid.width-1][0..grid.height-1]) + */ +vtr::NdMatrix get_number_track_to_track_inter_die_conn(t_sb_connection_map* sb_conn_map, + const int custom_3d_sb_fanin_fanout, + RRGraphBuilder& rr_graph_builder); int find_average_rr_node_index(int device_width, int device_height, @@ -182,12 +209,16 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, const DeviceGrid& grid, const int Fs_per_side, t_sblock_pattern& sblock_pattern, + vtr::NdMatrix& num_of_3d_conns_custom_SB, RRNodeId from_rr_node, t_rr_edge_info_set& rr_edges_to_create, + t_rr_edge_info_set& des_3d_rr_edges_to_create, const t_chan_seg_details* from_seg_details, const t_chan_seg_details* to_seg_details, const t_chan_details& to_chan_details, const enum e_directionality directionality, + const int custom_3d_sb_fanin_fanout, + const int delayless_switch, const vtr::NdMatrix, 3>& switch_block_conn, t_sb_connection_map* sb_conn_map); diff --git a/vpr/src/route/rr_graph_sbox.cpp b/vpr/src/route/rr_graph_sbox.cpp index 22a4db0c4be..0e57727f056 100644 --- a/vpr/src/route/rr_graph_sbox.cpp +++ b/vpr/src/route/rr_graph_sbox.cpp @@ -24,8 +24,8 @@ * BOTTOM (CHANY) */ /* [0..3][0..3][0..nodes_per_chan-1]. Structure below is indexed as: * - * [from_side][to_side][from_track]. That yields an integer vector (ivec) * - * of the tracks to which from_track connects in the proper to_location. * + * [from_side][to_side][from_tracks]. That yields an integer vector (ivec) * + * of the tracks to which from_tracks connects in the proper to_location. * * For simple switch boxes this is overkill, but it will allow complicated * * switch boxes with Fs > 3, etc. without trouble. */ @@ -40,8 +40,8 @@ vtr::NdMatrix, 3> alloc_and_load_switch_block_conn(t_chan_width vtr::NdMatrix, 3> switch_block_conn({4, 4, (size_t)nodes_per_chan->max}); - for (e_side from_side : {TOP, RIGHT, BOTTOM, LEFT}) { - for (e_side to_side : {TOP, RIGHT, BOTTOM, LEFT}) { + for (e_side from_side : TOTAL_2D_SIDES) { + for (e_side to_side : TOTAL_2D_SIDES) { int from_chan_width = (from_side == TOP || from_side == BOTTOM) ? nodes_per_chan->y_max : nodes_per_chan->x_max; int to_chan_width = (to_side == TOP || to_side == BOTTOM) ? nodes_per_chan->y_max : nodes_per_chan->x_max; for (int from_track = 0; from_track < from_chan_width; from_track++) { @@ -95,7 +95,7 @@ int get_simple_switch_block_track(const enum e_side from_side, const enum e_switch_block_type switch_block_type, const int from_chan_width, const int to_chan_width) { - /* This routine returns the track number to which the from_track should * + /* This routine returns the track number to which the from_tracks should * * connect. It supports three simple, Fs = 3, switch blocks. */ int to_track = SBOX_ERROR; /* Can check to see if it's not set later. */ diff --git a/vpr/src/route/rr_graph_timing_params.cpp b/vpr/src/route/rr_graph_timing_params.cpp index f89b8aba9a0..ae316cef2c7 100644 --- a/vpr/src/route/rr_graph_timing_params.cpp +++ b/vpr/src/route/rr_graph_timing_params.cpp @@ -58,7 +58,7 @@ void add_rr_graph_C_from_switches(float C_ipin_cblock) { from_rr_type = rr_graph.node_type(rr_id); - if (from_rr_type == CHANX || from_rr_type == CHANY) { + if ((from_rr_type == CHANX || from_rr_type == CHANY)) { for (t_edge_size iedge = 0; iedge < rr_graph.num_edges(rr_id); iedge++) { to_node = size_t(rr_graph.edge_sink_node(rr_id, iedge)); to_rr_type = rr_graph.node_type(RRNodeId(to_node)); diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index 0285a42a5da..98620b56009 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -4,6 +4,9 @@ #include #include +#include "pack_types.h" +#include "prepack.h" +#include "vpr_context.h" #include "vtr_assert.h" #include "vtr_log.h" #include "vtr_memory.h" @@ -17,7 +20,6 @@ #include "cluster_placement.h" #include "device_grid.h" #include "user_route_constraints.h" -#include "re_cluster_util.h" #include "placer_state.h" #include "grid_block.h" @@ -1124,14 +1126,11 @@ const t_pb_graph_pin* find_pb_graph_pin(const AtomNetlist& netlist, const AtomLo return get_pb_graph_node_pin_from_model_port_pin(model_port, ipin, pb_gnode); } -t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int ipin) { +t_pb_graph_pin* get_pb_graph_node_pin_from_pb_graph_node(t_pb_graph_node* pb_graph_node, + int ipin) { int i, count; - const t_pb_type* pb_type; - t_pb_graph_node* pb_graph_node; - auto& cluster_ctx = g_vpr_ctx.clustering(); - pb_graph_node = cluster_ctx.clb_nlist.block_pb(iblock)->pb_graph_node; - pb_type = pb_graph_node->pb_type; + const t_pb_type* pb_type = pb_graph_node->pb_type; /* If this is post-placed, then the ipin may have been shuffled up by the z * num_pins, * bring it back down to 0..num_pins-1 range for easier analysis */ @@ -1169,6 +1168,13 @@ t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int return nullptr; } +t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int ipin) { + auto& cluster_ctx = g_vpr_ctx.clustering(); + + t_pb_graph_node* pb_graph_node = cluster_ctx.clb_nlist.block_pb(iblock)->pb_graph_node; + return get_pb_graph_node_pin_from_pb_graph_node(pb_graph_node, ipin); +} + const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, const std::string& port_name) { const t_pb_graph_pin* gpin = find_pb_graph_pin(pb_gnode, port_name, 0); @@ -1363,12 +1369,13 @@ std::tuple get_cluster_internal_class_pairs(const AtomLookup& atom_lookup, ClusterBlockId cluster_block_id) { + const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering(); std::vector class_num_vec; auto [physical_tile, sub_tile, rel_cap, logical_block] = get_cluster_blk_physical_spec(cluster_block_id); class_num_vec.reserve(physical_tile->primitive_class_inf.size()); - const auto& cluster_atoms = cluster_to_atoms(cluster_block_id); + const auto& cluster_atoms = cluster_ctx.atoms_lookup[cluster_block_id]; for (AtomBlockId atom_blk_id : cluster_atoms) { auto atom_pb_graph_node = atom_lookup.atom_pb_graph_node(atom_blk_id); auto class_range = get_pb_graph_node_class_physical_range(physical_tile, @@ -1532,7 +1539,7 @@ void free_pb(t_pb* pb) { free_pb_stats(pb); } -void revalid_molecules(const t_pb* pb) { +void revalid_molecules(const t_pb* pb, const Prepacker& prepacker) { const t_pb_type* pb_type = pb->pb_graph_node->pb_type; if (pb_type->blif_model == nullptr) { @@ -1540,7 +1547,7 @@ void revalid_molecules(const t_pb* pb) { for (int i = 0; i < pb_type->modes[mode].num_pb_type_children && pb->child_pbs != nullptr; i++) { for (int j = 0; j < pb_type->modes[mode].pb_type_children[i].num_pb && pb->child_pbs[i] != nullptr; j++) { if (pb->child_pbs[i][j].name != nullptr || pb->child_pbs[i][j].child_pbs != nullptr) { - revalid_molecules(&pb->child_pbs[i][j]); + revalid_molecules(&pb->child_pbs[i][j], prepacker); } } } @@ -1556,7 +1563,7 @@ void revalid_molecules(const t_pb* pb) { atom_ctx.lookup.set_atom_clb(blk_id, ClusterBlockId::INVALID()); atom_ctx.lookup.set_atom_pb(blk_id, nullptr); - t_pack_molecule* cur_molecule = atom_ctx.prepacker.get_atom_molecule(blk_id); + t_pack_molecule* cur_molecule = prepacker.get_atom_molecule(blk_id); if (cur_molecule->valid == false) { int i; for (i = 0; i < get_array_size_of_molecule(cur_molecule); i++) { @@ -2201,7 +2208,7 @@ RRNodeId get_pin_rr_node_id(const RRSpatialLookup& rr_spatial_lookup, std::vector x_offset; std::vector y_offset; std::vector pin_sides; - std::tie(x_offset, y_offset, pin_sides) = get_pin_coordinates(physical_tile, pin_physical_num, std::vector(SIDES.begin(), SIDES.end())); + std::tie(x_offset, y_offset, pin_sides) = get_pin_coordinates(physical_tile, pin_physical_num, std::vector(TOTAL_2D_SIDES.begin(), TOTAL_2D_SIDES.end())); VTR_ASSERT(!x_offset.empty()); RRNodeId node_id = RRNodeId::INVALID(); for (int coord_idx = 0; coord_idx < (int)pin_sides.size(); coord_idx++) { diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index 24da4489b6b..9f08dcc0d2b 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -17,6 +17,7 @@ class DeviceGrid; class PlacerState; +class Prepacker; const t_model* find_model(const t_model* models, const std::string& name, bool required = true); const t_model_ports* find_model_port(const t_model* model, const std::string& name, bool required = true); @@ -192,6 +193,9 @@ int get_max_nets_in_pb_type(const t_pb_type* pb_type); bool primitive_type_feasible(AtomBlockId blk_id, const t_pb_type* cur_pb_type); t_pb_graph_pin* get_pb_graph_node_pin_from_model_port_pin(const t_model_ports* model_port, const int model_pin, const t_pb_graph_node* pb_graph_node); const t_pb_graph_pin* find_pb_graph_pin(const AtomNetlist& netlist, const AtomLookup& netlist_lookup, const AtomPinId pin_id); +/// @brief Gets the pb_graph_node pin at the given pin index for the given +/// pb_graph_node. +t_pb_graph_pin* get_pb_graph_node_pin_from_pb_graph_node(t_pb_graph_node* pb_graph_node, int ipin); t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int ipin); vtr::vector alloc_and_load_pin_id_to_pb_mapping(); void free_pin_id_to_pb_mapping(vtr::vector& pin_id_to_pb_mapping); @@ -218,7 +222,7 @@ void parse_direct_pin_name(char* src_string, int line, int* start_pin_index, int void free_pb_stats(t_pb* pb); void free_pb(t_pb* pb); -void revalid_molecules(const t_pb* pb); +void revalid_molecules(const t_pb* pb, const Prepacker& prepacker); void print_switch_usage(); void print_usage_by_wire_length(); diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp index 1bc208bf3ba..c2ac5329a26 100644 --- a/vpr/test/test_connection_router.cpp +++ b/vpr/test/test_connection_router.cpp @@ -191,9 +191,6 @@ TEST_CASE("connection_router", "[vpr]") { free_routing_structs(); vpr_free_all(arch, vpr_setup); - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - atom_ctx.prepacker.reset(); } } // namespace diff --git a/vpr/test/test_post_verilog.cpp b/vpr/test/test_post_verilog.cpp index be4bd45f045..a8344fa79d4 100644 --- a/vpr/test/test_post_verilog.cpp +++ b/vpr/test/test_post_verilog.cpp @@ -35,10 +35,6 @@ void do_vpr_flow(const char* input_unc_opt, const char* output_unc_opt) { free_routing_structs(); vpr_free_all(arch, vpr_setup); - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - atom_ctx.prepacker.reset(); - REQUIRE(flow_succeeded == true); } diff --git a/vpr/test/test_vpr.cpp b/vpr/test/test_vpr.cpp index da0b4c8b21c..0e92311b5c2 100644 --- a/vpr/test/test_vpr.cpp +++ b/vpr/test/test_vpr.cpp @@ -169,9 +169,6 @@ TEST_CASE("read_rr_graph_metadata", "[vpr]") { echo_file_name, false); vpr_free_all(arch, vpr_setup); - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - atom_ctx.prepacker.reset(); } REQUIRE(src_inode != -1); @@ -233,9 +230,6 @@ TEST_CASE("read_rr_graph_metadata", "[vpr]") { CHECK_THAT(value->as_string().get(&arch.strings), Equals("test edge")); } vpr_free_all(arch, vpr_setup); - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - atom_ctx.prepacker.reset(); } } // namespace diff --git a/vtr_flow/arch/multi_die/stratixiv_3d/3d_SB_inter_die_stratixiv_arch.timing.xml b/vtr_flow/arch/multi_die/stratixiv_3d/3d_SB_inter_die_stratixiv_arch.timing.xml new file mode 100644 index 00000000000..d671996b6f4 --- /dev/null +++ b/vtr_flow/arch/multi_die/stratixiv_3d/3d_SB_inter_die_stratixiv_arch.timing.xml @@ -0,0 +1,48370 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.core_out[0:19] io.core_in[0:24] io.clk[0:2] + io.core_out[20:39] io.core_in[24:49] io.clk[3:4] + io.core_out[0:19] io.core_in[0:24] io.clk[0:2] + io.core_in[24:49] io.clk[3:4] io.core_out[20:39] + + + + + + + + + + + + + + + PLL.out_clock PLL.out_signal PLL.in_signal PLL.in_clock + PLL.out_clock PLL.out_signal PLL.in_signal PLL.in_clock + PLL.out_clock PLL.out_signal PLL.in_signal PLL.in_clock + PLL.out_clock PLL.out_signal PLL.in_signal PLL.in_clock + + + + + + + + + + + + + + + + + + + + + + + + + + + LAB.data_out[0:19] LAB.data_in[0:39] LAB.control_in[0:3] LAB.clk[0] + LAB.data_out[20:39] LAB.data_in[40:79] LAB.control_in[4:6] LAB.clk[1] + LAB.data_out LAB.data_in LAB.control_in LAB.clk LAB.cin LAB.sharein + LAB.cout LAB.shareout + + + + + + + + + + + + + + + + + + + + + + + + + + + DSP.data_out_top[17:0] DSP.scan_a_out[1:0] DSP.data_in[35:0] DSP.control_in[2:0] DSP.scan_a_in[1:0] DSP.clk[0] + DSP.data_out_top[35:18] DSP.scan_a_out[3:2] DSP.signal_out[0:0] DSP.data_in[71:36] DSP.control_in[4:3] DSP.scan_a_in[4:2] + DSP.data_out_top[53:36] DSP.scan_a_out[5:4] DSP.signal_out[1:1] DSP.data_in[107:72] DSP.control_in[6:5] DSP.scan_a_in[6:5] DSP.clk[1] + DSP.data_out_top[71:54] DSP.scan_a_out[7:6] DSP.signal_out[2:2] DSP.data_in[144:108] DSP.control_in[8:6] DSP.scan_a_in[8:7] + DSP.data_out_top[35:0] DSP.scan_a_out[3:0] DSP.signal_out[0:0] DSP.data_in[143:72] DSP.control_in[3:0] DSP.scan_a_in[3:0] DSP.clk[0] DSP.chain_in + DSP.data_out_top[71:36] DSP.scan_a_out[9:4] DSP.signal_out[2:1] DSP.data_in[71:0] DSP.control_in[8:4] DSP.scan_a_in[8:4] DSP.clk[1] + + + + DSP.data_out_bot[17:0] DSP.scan_a_out[9:8] DSP.data_in[179:145] DSP.control_in[11:9] DSP.scan_a_in[10:9] DSP.clk[2] + DSP.data_out_bot[35:18] DSP.scan_a_out[11:10] DSP.signal_out[3:3] DSP.data_in[215:180] DSP.control_in[13:12] DSP.scan_a_in[12:11] + DSP.data_in[251:216] DSP.control_in[15:14] DSP.scan_a_in[14:13] DSP.data_out_bot[53:36] DSP.scan_a_out[13:12] DSP.signal_out[4:4] + DSP.data_in[287:252] DSP.control_in[20:16] DSP.scan_a_in[17:15] DSP.clk[3] DSP.data_out_bot[71:54] DSP.scan_a_out[15:14] DSP.signal_out[5:5] + DSP.data_in[287:215] DSP.control_in[13:9] DSP.scan_a_in[13:9] DSP.clk[2] DSP.data_out_bot[35:0] DSP.scan_a_out[12:10] DSP.signal_out[3:3] + DSP.data_in[214:144] DSP.control_in[20:16] DSP.scan_a_in[17:14] DSP.clk[3] DSP.data_out_bot[71:36] DSP.scan_a_out[17:13] DSP.signal_out[5:4] + + DSP.chain_out + + + + + + + + + + + + + + + + + + M9K.data_out[17:0] M9K.control_out[1:0] M9K.data_addr_control_in[51:0] M9K.clk_in[0] + M9K.data_out[35:18] M9K.control_out[2:2] M9K.data_addr_control_in[103:52] M9K.clk_in[1] + M9K.data_out M9K.control_out M9K.data_addr_control_in M9K.clk_in + + + + + + + + + + + + + + + + + + + M144K.data_out[7:0] M144K.data_addr_control_in[24:0] + M144K.data_out[15:8] M144K.data_addr_control_in[50:25] + M144K.data_out[22:16] M144K.data_addr_control_in[76:51] + M144K.data_out[29:23] + M144K.data_out[36:30] M144K.control_out[0] M144K.data_addr_control_in[128:103] M144K.clk_in[0] + M144K.data_out[43:37] M144K.control_out[1] M144K.data_addr_control_in[154:129] + M144K.data_out[51:44] M144K.data_addr_control_in[180:155] + M144K.data_out[59:52] M144K.data_addr_control_in[206:181] + M144K.data_out[67:60] M144K.data_addr_control_in[232:207] + M144K.data_out[75:68] M144K.data_addr_control_in[258:233] + M144K.data_out[82:76] M144K.data_addr_control_in[284:259] + M144K.data_out[89:83] M144K.control_out[2] M144K.data_addr_control_in[310:285] + M144K.data_out[96:90] M144K.data_addr_control_in[336:311] M144K.clk_in[1] + M144K.data_out[103:97] M144K.data_addr_control_in[362:337] + M144K.data_out[111:104] M144K.data_addr_control_in[388:363] + M144K.data_out[119:112] M144K.data_addr_control_in[415:389] + M144K.data_out[14:0] M144K.data_addr_control_in[51:0] + M144K.data_out[29:15] M144K.data_addr_control_in[103:52] + M144K.data_out[44:30] M144K.control_out[0] M144K.data_addr_control_in[155:104] + M144K.data_out[59:45] M144K.control_out[1] M144K.data_addr_control_in[207:156] M144K.clk_in[0] + M144K.data_addr_control_in[259:208] M144K.clk_in[1] M144K.data_out[74:60] M144K.control_out[2] + M144K.data_out[89:75] M144K.data_addr_control_in[311:260] + M144K.data_out[104:90] M144K.data_addr_control_in[363:312] + M144K.data_out[119:105] M144K.data_addr_control_in[415:364] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 1 1 1 1 + 1 1 1 1 + + + + + + + 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 + + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 261e-12 + 261e-12 + 261e-12 + 261e-12 + 261e-12 + 261e-12 + + + 193e-12 + 193e-12 + 193e-12 + 193e-12 + 193e-12 + 193e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/vtr_flow/arch/noc/mesh_noc_topology/mlp_benchmarks.stratixiv_arch.timing_with_a_embedded_4x4_mesh_noc_topology.xml b/vtr_flow/arch/noc/mesh_noc_topology/mlp_benchmarks.stratixiv_arch.timing_with_a_embedded_4x4_mesh_noc_topology.xml index 456404fe457..80b185e17cb 100644 --- a/vtr_flow/arch/noc/mesh_noc_topology/mlp_benchmarks.stratixiv_arch.timing_with_a_embedded_4x4_mesh_noc_topology.xml +++ b/vtr_flow/arch/noc/mesh_noc_topology/mlp_benchmarks.stratixiv_arch.timing_with_a_embedded_4x4_mesh_noc_topology.xml @@ -48449,7 +48449,7 @@ - + diff --git a/vtr_flow/arch/timing/k6_N10_40nm.xml b/vtr_flow/arch/timing/k6_N10_40nm.xml index 711e4825539..07e017ad7d4 100644 --- a/vtr_flow/arch/timing/k6_N10_40nm.xml +++ b/vtr_flow/arch/timing/k6_N10_40nm.xml @@ -4,27 +4,38 @@ - 40 nm technology - General purpose logic block: K = 6, N = 10 - - Routing architecture: L = 4, fc_in = 0.15, Fc_out = 0.1 + - Routing architecture: L = 4, fc_in = 0.15, fc_out = 0.15 + - Unidirectional (mux-based) routing + Details on Modelling: Based on flagship k6_frac_N10_mem32K_40nm.xml architecture. This architecture has no fracturable LUTs nor any heterogeneous blocks. - + The delays and areas are based on a mix of values from commercial 40 nm + FPGAs with a comparable architecture and 40 nm interconnect and + transistor models. Authors: Jason Luu, Jeff Goeders, Vaughn Betz --> + + + @@ -34,7 +45,15 @@ - + + io.outpad io.inpad io.clock io.outpad io.inpad io.clock @@ -43,21 +62,42 @@ - + + + + + + + + - + - - + + + @@ -67,22 +107,11 @@ + + + - + + + + mux transistors, and this gives a reasonable stage ratio of a bit over 5x to the second stage. + --> + Wires of this pitch will fit over a 90 nm + high logic tile (which is about the height of a Stratix IV logic tile). + I'm using a tile length of 90 nm, corresponding to the length of a Stratix IV tile if it were square. + length below is in units of logic blocks, and Rmetal and Cmetal are + per logic block passed, so wire delay adapts automatically if you change the + length=? value. --> + + 1 1 1 1 1 1 1 1 1 + + + - @@ -156,25 +197,17 @@ - - - + + + - + @@ -198,22 +231,16 @@ - + 82e-12 173e-12 261e-12 263e-12 398e-12 397e-12 - --> - - 261e-12 - 261e-12 - 261e-12 - 261e-12 - 261e-12 - 261e-12 @@ -224,6 +251,10 @@ + + @@ -262,15 +293,12 @@ - - - diff --git a/vtr_flow/arch/timing/k6_N10_sparse_crossbar_40nm.xml b/vtr_flow/arch/timing/k6_N10_sparse_crossbar_40nm.xml new file mode 100644 index 00000000000..2d6dfad7a7d --- /dev/null +++ b/vtr_flow/arch/timing/k6_N10_sparse_crossbar_40nm.xml @@ -0,0 +1,348 @@ + + + + + + + + + + + + + + + + + + + + + io.outpad io.inpad io.clock + io.outpad io.inpad io.clock + io.outpad io.inpad io.clock + io.outpad io.inpad io.clock + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 1 1 1 1 + 1 1 1 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 82e-12 + 173e-12 + 261e-12 + 263e-12 + 398e-12 + 397e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1/vpr_reg_mcnc_equiv/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1/vpr_reg_mcnc_equiv/config/golden_results.txt index 8157d1a7724..4a1e9b67c4e 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1/vpr_reg_mcnc_equiv/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1/vpr_reg_mcnc_equiv/config/golden_results.txt @@ -1,20 +1,20 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -k6_N10_40nm.xml alu4.pre-vpr.blif common 7.21 vpr 65.09 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 96 14 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66652 14 8 926 934 0 478 118 12 12 144 clb auto 27.6 MiB 0.39 4805 7921 1196 6264 461 65.1 MiB 0.16 0.00 4.7509 -33.2503 -4.7509 nan 0.36 0.00190138 0.00162009 0.0688365 0.0606739 74 6597 21 5.3894e+06 5.17382e+06 608941. 4228.75 4.36 0.724831 0.62564 14184 119952 -1 6717 44 4137 20437 842824 113986 4.58526 nan -32.9374 -4.58526 0 0 758555. 5267.75 0.15 0.25 0.07 -1 -1 0.15 0.111914 0.0993596 -k6_N10_40nm.xml apex2.pre-vpr.blif common 11.41 vpr 67.08 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 114 38 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68692 39 3 1113 1117 0 655 156 13 13 169 clb auto 29.4 MiB 0.52 7970 14402 2695 10284 1423 67.1 MiB 0.31 0.01 5.67046 -16.7656 -5.67046 nan 0.44 0.00272498 0.0023725 0.117567 0.10279 74 12212 25 6.52117e+06 6.14392e+06 728195. 4308.85 7.04 1.29022 1.10763 16710 144151 -1 12100 18 6056 29564 1238130 177195 5.46327 nan -16.0252 -5.46327 0 0 906856. 5366.01 0.29 0.45 0.14 -1 -1 0.29 0.156952 0.142444 -k6_N10_40nm.xml apex4.pre-vpr.blif common 7.42 vpr 65.21 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 95 9 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66780 9 19 897 916 0 536 123 12 12 144 clb auto 27.4 MiB 0.43 6343 9578 1685 7369 524 65.2 MiB 0.20 0.01 4.76124 -79.5577 -4.76124 nan 0.35 0.00213426 0.00181729 0.0770497 0.0677823 62 10552 34 5.3894e+06 5.11993e+06 523024. 3632.11 4.01 0.784997 0.679634 13040 101000 -1 9514 34 6114 31519 1235789 184059 5.08979 nan -80.98 -5.08979 0 0 643745. 4470.45 0.22 0.52 0.10 -1 -1 0.22 0.192852 0.171041 -k6_N10_40nm.xml bigkey.pre-vpr.blif common 8.34 vpr 66.71 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 94 229 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68312 263 197 1372 1603 1 490 554 17 17 289 io auto 28.7 MiB 0.31 4429 175652 50348 113731 11573 66.7 MiB 0.75 0.02 2.82334 -708.457 -2.82334 2.82334 0.85 0.00300682 0.00269253 0.247317 0.22094 34 7778 18 1.21262e+07 5.06604e+06 661981. 2290.59 3.56 0.96314 0.863927 21366 128092 -1 7371 18 2453 11694 676065 127375 3.03973 3.03973 -763.206 -3.03973 0 0 811075. 2806.49 0.32 0.31 0.12 -1 -1 0.32 0.139942 0.127415 -k6_N10_40nm.xml clma.pre-vpr.blif common 36.92 vpr 100.13 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 380 62 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 102536 383 82 3674 4077 1 2327 845 22 22 484 clb auto 53.8 MiB 1.83 30495 408131 149393 204426 54312 91.0 MiB 4.41 0.03 8.38463 -355.215 -8.38463 8.38463 1.59 0.00553376 0.00482363 1.10718 0.941379 86 48680 35 2.15576e+07 2.04797e+07 2.58188e+06 5334.46 19.54 3.95042 3.31869 52488 536144 -1 43642 27 20275 90858 4018144 545419 8.17711 8.17711 -353.933 -8.17711 0 0 3.23937e+06 6692.90 0.78 1.10 0.32 -1 -1 0.78 0.412432 0.36509 -k6_N10_40nm.xml des.pre-vpr.blif common 7.33 vpr 64.82 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 99 256 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66380 256 245 954 1199 0 610 600 18 18 324 io auto 26.8 MiB 0.24 4908 149811 41746 100723 7342 64.8 MiB 0.56 0.01 3.98472 -745.824 -3.98472 nan 0.95 0.002533 0.00233963 0.161563 0.149504 32 8452 46 1.37969e+07 5.33551e+06 718733. 2218.31 2.56 0.640485 0.5926 23676 138656 -1 7309 16 2635 6179 382431 81452 4.25723 nan -787.373 -4.25723 0 0 879796. 2715.42 0.36 0.21 0.13 -1 -1 0.36 0.106126 0.0991908 -k6_N10_40nm.xml diffeq.pre-vpr.blif common 5.71 vpr 65.49 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 97 64 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67064 64 39 1371 1410 1 553 200 12 12 144 clb auto 27.6 MiB 0.34 3782 22392 4498 16716 1178 65.5 MiB 0.25 0.01 5.76255 -1080.02 -5.76255 5.76255 0.36 0.00236145 0.00203563 0.102983 0.0912687 46 5161 24 5.3894e+06 5.22772e+06 394751. 2741.33 1.69 0.633809 0.550745 11608 77537 -1 5089 24 3148 10672 380947 63653 5.40496 5.40496 -1070.98 -5.40496 0 0 505417. 3509.84 0.17 0.25 0.07 -1 -1 0.17 0.142657 0.127941 -k6_N10_40nm.xml dsip.pre-vpr.blif common 9.03 vpr 66.61 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 95 229 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68208 229 197 1370 1567 1 535 521 16 16 256 io auto 28.7 MiB 0.32 4249 145076 42098 96002 6976 66.6 MiB 0.69 0.01 2.82038 -687.741 -2.82038 2.82038 0.71 0.00278869 0.00251961 0.222451 0.198857 34 7820 28 1.05632e+07 5.11993e+06 580208. 2266.44 4.63 0.965895 0.865428 18880 112045 -1 7420 18 2762 9849 588259 120916 2.94626 2.94626 -745.332 -2.94626 0 0 710900. 2776.95 0.27 0.28 0.10 -1 -1 0.27 0.137157 0.125437 -k6_N10_40nm.xml elliptic.pre-vpr.blif common 25.44 vpr 78.20 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 230 131 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 80080 131 114 3421 3535 1 1217 475 18 18 324 clb auto 41.1 MiB 0.96 11435 128263 36626 85751 5886 78.2 MiB 1.31 0.02 7.47596 -4443.09 -7.47596 7.47596 0.95 0.00614289 0.00546322 0.48079 0.413918 50 20115 48 1.37969e+07 1.23956e+07 1.02665e+06 3168.68 12.14 2.63609 2.25374 27232 203968 -1 16697 23 7740 32519 1480622 220162 7.27428 7.27428 -4524.79 -7.27428 0 0 1.31637e+06 4062.87 0.50 0.76 0.20 -1 -1 0.50 0.391654 0.347969 -k6_N10_40nm.xml ex1010.pre-vpr.blif common 36.12 vpr 85.68 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 302 10 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 87736 10 10 2659 2669 0 1417 322 20 20 400 clb auto 45.3 MiB 1.53 26735 58781 15782 41072 1927 82.0 MiB 1.36 0.02 6.79311 -65.8142 -6.79311 nan 1.26 0.00790626 0.00655989 0.45956 0.389002 98 46081 40 1.74617e+07 1.6276e+07 2.35420e+06 5885.50 26.21 3.66603 3.08292 46488 495728 -1 40094 26 13124 79778 4200582 472080 6.72249 nan -64.7071 -6.72249 0 0 2.96690e+06 7417.26 0.70 0.97 0.30 -1 -1 0.70 0.301 0.268194 -k6_N10_40nm.xml ex5p.pre-vpr.blif common 8.16 vpr 63.91 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 78 8 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 65440 8 63 761 824 0 443 149 11 11 121 clb auto 26.1 MiB 0.33 4420 9999 1667 7706 626 63.9 MiB 0.15 0.00 4.13681 -180.38 -4.13681 nan 0.29 0.00162574 0.00140448 0.0573666 0.0510513 68 6844 29 4.36541e+06 4.20373e+06 471571. 3897.28 5.40 0.972863 0.839418 11382 90811 -1 6299 27 3950 17583 680545 104378 4.04861 nan -185.982 -4.04861 0 0 579861. 4792.24 0.19 0.31 0.09 -1 -1 0.19 0.131733 0.118193 -k6_N10_40nm.xml frisc.pre-vpr.blif common 27.72 vpr 78.53 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 240 20 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 80412 20 116 3175 3291 1 1254 376 18 18 324 clb auto 41.2 MiB 0.96 14937 83092 21035 56910 5147 78.5 MiB 1.17 0.02 9.37137 -4836.23 -9.37137 9.37137 0.99 0.00592225 0.00528715 0.438554 0.382925 66 23239 30 1.37969e+07 1.29346e+07 1.36437e+06 4211.00 13.84 3.3299 2.8475 30784 270180 -1 20879 24 8723 36885 1817074 266114 9.12568 9.12568 -4847.74 -9.12568 0 0 1.68162e+06 5190.19 0.59 0.84 0.27 -1 -1 0.59 0.397344 0.351977 -k6_N10_40nm.xml misex3.pre-vpr.blif common 7.60 vpr 64.73 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 87 14 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66280 14 14 828 842 0 488 115 12 12 144 clb auto 26.9 MiB 0.36 4941 5695 788 4522 385 64.7 MiB 0.13 0.00 4.69826 -60.2129 -4.69826 nan 0.36 0.00170664 0.00147133 0.0572936 0.0509496 54 8009 47 5.3894e+06 4.68878e+06 451357. 3134.42 4.30 0.924117 0.797498 12324 89954 -1 7171 21 4179 17710 662276 104535 4.54449 nan -60.4911 -4.54449 0 0 586610. 4073.68 0.19 0.29 0.09 -1 -1 0.19 0.124236 0.112554 -k6_N10_40nm.xml pdc.pre-vpr.blif common 29.89 vpr 82.92 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 307 16 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 84912 16 40 2839 2879 0 1501 363 20 20 400 clb auto 46.4 MiB 1.31 24796 68945 17552 48401 2992 82.9 MiB 1.38 0.02 7.00232 -251.23 -7.00232 nan 1.21 0.00652942 0.00561975 0.453795 0.382384 80 44574 39 1.74617e+07 1.65455e+07 1.96642e+06 4916.06 16.16 3.20485 2.70123 41700 405380 -1 36503 18 12436 74071 3266555 428458 6.7432 nan -249.933 -6.7432 0 0 2.46811e+06 6170.27 0.94 1.30 0.42 -1 -1 0.94 0.437431 0.392854 -k6_N10_40nm.xml s298.pre-vpr.blif common 4.64 vpr 63.79 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 77 4 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 65316 4 6 726 732 1 378 87 11 11 121 clb auto 26.1 MiB 0.30 3693 3735 430 3194 111 63.8 MiB 0.10 0.00 6.30858 -51.7143 -6.30858 6.30858 0.27 0.00174803 0.00148525 0.0451667 0.0404791 44 6200 32 4.36541e+06 4.14984e+06 309216. 2555.51 1.64 0.405195 0.352697 9582 61621 -1 5457 22 3588 18601 711043 111359 6.51519 6.51519 -54.7562 -6.51519 0 0 401578. 3318.83 0.12 0.26 0.06 -1 -1 0.12 0.106611 0.0963338 -k6_N10_40nm.xml s38584.1.pre-vpr.blif common 31.21 vpr 87.25 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 368 38 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 89348 39 304 4677 4982 1 2229 711 22 22 484 clb auto 50.4 MiB 1.17 15892 248127 72238 162853 13036 87.3 MiB 2.40 0.03 5.02641 -3215.78 -5.02641 5.02641 1.57 0.00844683 0.00754098 0.749422 0.648136 44 24233 33 2.15576e+07 1.9833e+07 1.41060e+06 2914.46 7.76 3.1785 2.75583 39444 288878 -1 21443 23 12112 33710 1370500 248738 5.15976 5.15976 -3289.71 -5.15976 0 0 1.82601e+06 3772.76 0.73 0.91 0.28 -1 -1 0.73 0.543496 0.483827 -k6_N10_40nm.xml seq.pre-vpr.blif common 7.61 vpr 65.78 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 103 41 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67356 41 35 1006 1041 0 588 179 13 13 169 clb auto 28.2 MiB 0.47 6846 17819 3270 12655 1894 65.8 MiB 0.27 0.01 4.96104 -139.99 -4.96104 nan 0.43 0.00209503 0.00179479 0.0985096 0.0867435 56 12202 44 6.52117e+06 5.55108e+06 559864. 3312.80 3.70 0.750444 0.65127 14694 110679 -1 10652 23 5478 25529 1041333 159644 4.77402 nan -140.731 -4.77402 0 0 714795. 4229.55 0.24 0.41 0.11 -1 -1 0.24 0.151125 0.136708 -k6_N10_40nm.xml spla.pre-vpr.blif common 29.32 vpr 77.64 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 245 16 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 79508 16 46 2232 2278 0 1154 307 18 18 324 clb auto 40.3 MiB 1.01 17066 52057 12539 36561 2957 77.6 MiB 0.98 0.02 6.1923 -216.925 -6.1923 nan 0.96 0.00511679 0.00441538 0.341779 0.291619 70 29237 30 1.37969e+07 1.3204e+07 1.42834e+06 4408.47 18.56 3.25549 2.73837 31752 286880 -1 25273 21 9773 55725 2341275 316396 6.1326 nan -216.962 -6.1326 0 0 1.78317e+06 5503.60 0.64 1.01 0.30 -1 -1 0.64 0.364275 0.325415 -k6_N10_40nm.xml tseng.pre-vpr.blif common 6.63 vpr 66.31 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 110 52 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:20 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67900 52 122 1461 1583 1 509 284 13 13 169 clb auto 28.5 MiB 0.29 3129 40403 8633 29315 2455 66.3 MiB 0.30 0.01 5.00636 -1311.79 -5.00636 5.00636 0.45 0.00228655 0.00204201 0.110733 0.0982164 32 5107 32 6.52117e+06 5.92834e+06 352895. 2088.14 2.83 0.847248 0.735343 12174 67024 -1 4717 29 2763 8262 352580 70758 4.58906 4.58906 -1300.62 -4.58906 0 0 431135. 2551.09 0.16 0.23 0.06 -1 -1 0.16 0.140174 0.124654 +k6_N10_40nm.xml alu4.pre-vpr.blif common 8.37 vpr 65.41 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 97 14 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66984 14 8 926 934 0 517 119 12 12 144 clb auto 27.7 MiB 0.40 4978 9171 1462 7157 552 65.4 MiB 0.20 0.01 4.85854 -36.6925 -4.85854 nan 0.37 0.0021973 0.001926 0.087389 0.0762459 52 6841 26 1.8e+06 1.746e+06 452692. 3143.70 5.01 1.17991 1.00821 12180 91053 -1 6642 23 3992 14975 576861 90555 4.88853 nan -35.9466 -4.88853 0 0 594734. 4130.10 0.19 0.29 0.09 -1 -1 0.19 0.144306 0.130414 +k6_N10_40nm.xml apex2.pre-vpr.blif common 11.54 vpr 67.02 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 115 38 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68632 39 3 1113 1117 0 675 157 13 13 169 clb auto 29.5 MiB 0.52 7879 13693 2337 10136 1220 67.0 MiB 0.28 0.01 6.02154 -17.5859 -6.02154 nan 0.45 0.00275834 0.00233622 0.107502 0.0951802 62 13086 48 2.178e+06 2.07e+06 652532. 3861.14 7.21 1.37926 1.19039 15366 127615 -1 11563 30 6785 30387 1264252 174643 5.82632 nan -17.1242 -5.82632 0 0 801739. 4744.02 0.26 0.57 0.12 -1 -1 0.26 0.221433 0.19916 +k6_N10_40nm.xml apex4.pre-vpr.blif common 7.06 vpr 65.45 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 95 9 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67016 9 19 897 916 0 569 123 12 12 144 clb auto 27.7 MiB 0.44 6709 9578 1569 7337 672 65.4 MiB 0.22 0.01 5.16784 -84.6504 -5.16784 nan 0.36 0.00241558 0.0020605 0.0899188 0.0795928 62 10796 38 1.8e+06 1.71e+06 546237. 3793.31 3.62 0.755057 0.654516 13040 106280 -1 9528 24 5403 25371 1032193 149160 5.31039 nan -84.6173 -5.31039 0 0 671089. 4660.34 0.22 0.44 0.10 -1 -1 0.22 0.163682 0.147834 +k6_N10_40nm.xml bigkey.pre-vpr.blif common 9.29 vpr 66.61 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 94 229 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68204 263 197 1372 1603 1 490 554 17 17 289 io auto 28.6 MiB 0.32 4312 159734 46622 103443 9669 66.6 MiB 0.69 0.01 3.07033 -729.814 -3.07033 3.07033 0.90 0.00300216 0.00272837 0.221359 0.199167 34 7551 23 4.05e+06 1.692e+06 688919. 2383.80 4.43 0.958995 0.86192 21366 134962 -1 7110 17 2315 10686 592079 112425 3.1266 3.1266 -779.621 -3.1266 0 0 845950. 2927.16 0.34 0.29 0.12 -1 -1 0.34 0.139488 0.128092 +k6_N10_40nm.xml clma.pre-vpr.blif common 28.50 vpr 90.77 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 378 62 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 92944 383 82 3674 4077 1 2351 843 22 22 484 clb auto 53.7 MiB 1.74 30084 406803 150647 202517 53639 90.8 MiB 3.85 0.03 8.58818 -372.88 -8.58818 8.58818 1.68 0.00744604 0.00626682 0.959624 0.807498 80 49364 38 7.2e+06 6.804e+06 2.49993e+06 5165.15 11.74 3.19017 2.67278 50556 522948 -1 43579 26 21059 94912 4449370 570004 8.46141 8.46141 -379.797 -8.46141 0 0 3.14482e+06 6497.55 0.83 1.25 0.31 -1 -1 0.83 0.441494 0.389906 +k6_N10_40nm.xml des.pre-vpr.blif common 9.79 vpr 64.66 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 101 256 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66208 256 245 954 1199 0 613 602 18 18 324 io auto 26.5 MiB 0.23 4828 140357 37125 96029 7203 64.7 MiB 0.54 0.01 4.31026 -789.244 -4.31026 nan 1.00 0.00288301 0.00272449 0.170722 0.158812 34 7705 26 4.608e+06 1.818e+06 779010. 2404.35 4.97 1.01926 0.945251 24000 152888 -1 7130 18 2525 6134 402061 81348 4.49788 nan -806.729 -4.49788 0 0 956463. 2952.05 0.37 0.23 0.13 -1 -1 0.37 0.120003 0.112271 +k6_N10_40nm.xml diffeq.pre-vpr.blif common 7.31 vpr 65.93 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 95 64 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67516 64 39 1371 1410 1 554 198 12 12 144 clb auto 27.9 MiB 0.33 3809 23814 5171 17235 1408 65.9 MiB 0.27 0.01 6.20988 -1165.59 -6.20988 6.20988 0.38 0.00227363 0.00200923 0.114435 0.10123 34 5732 42 1.8e+06 1.71e+06 320785. 2227.68 3.18 0.992959 0.860999 10464 62065 -1 5209 22 3174 9420 385735 66218 6.26652 6.26652 -1186.82 -6.26652 0 0 394711. 2741.05 0.14 0.25 0.06 -1 -1 0.14 0.145037 0.130267 +k6_N10_40nm.xml dsip.pre-vpr.blif common 9.22 vpr 66.58 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 97 229 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68180 229 197 1370 1567 1 538 523 16 16 256 io auto 28.6 MiB 0.34 4639 156367 46237 102482 7648 66.6 MiB 0.78 0.01 3.20322 -726.151 -3.20322 3.20322 0.78 0.00314855 0.00281952 0.245995 0.219734 34 8368 47 3.528e+06 1.746e+06 604079. 2359.69 4.62 1.11544 1.00073 18880 118149 -1 7459 13 2689 9149 571229 113616 3.28489 3.28489 -779.779 -3.28489 0 0 742044. 2898.61 0.29 0.26 0.11 -1 -1 0.29 0.115099 0.106741 +k6_N10_40nm.xml elliptic.pre-vpr.blif common 23.94 vpr 78.45 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 235 131 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 80328 131 114 3421 3535 1 1210 480 18 18 324 clb auto 41.3 MiB 1.02 11455 124428 34728 84130 5570 78.4 MiB 1.32 0.02 8.08387 -4695.24 -8.08387 8.08387 1.04 0.00665594 0.00564279 0.475209 0.404761 50 19350 40 4.608e+06 4.23e+06 1.06618e+06 3290.67 8.07 2.25418 1.93137 27232 214208 -1 16443 24 8056 35475 1612694 233265 7.93357 7.93357 -4714.53 -7.93357 0 0 1.36711e+06 4219.48 0.49 0.83 0.20 -1 -1 0.49 0.418291 0.370112 +k6_N10_40nm.xml ex1010.pre-vpr.blif common 38.22 vpr 82.20 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 299 10 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 84168 10 10 2659 2669 0 1414 319 20 20 400 clb auto 45.7 MiB 1.52 26670 58036 15610 40440 1986 82.2 MiB 1.37 0.02 6.99148 -66.5608 -6.99148 nan 1.34 0.00734998 0.00639376 0.47292 0.399553 90 47190 31 5.832e+06 5.382e+06 2.27845e+06 5696.13 28.00 3.11918 2.60796 44092 472493 -1 40435 27 13453 81187 4495090 507770 6.68929 nan -64.8044 -6.68929 0 0 2.84047e+06 7101.17 0.71 1.14 0.29 -1 -1 0.71 0.336474 0.299193 +k6_N10_40nm.xml ex5p.pre-vpr.blif common 8.49 vpr 63.87 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 82 8 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 65400 8 63 761 824 0 451 153 12 12 144 clb auto 26.1 MiB 0.31 4564 13650 2467 10070 1113 63.9 MiB 0.19 0.01 4.36001 -198.144 -4.36001 nan 0.36 0.00202315 0.00175635 0.0754293 0.0662608 46 8116 47 1.8e+06 1.476e+06 409728. 2845.33 5.59 1.04053 0.898087 11608 81817 -1 7004 30 4945 21003 896494 141905 4.47246 nan -203.996 -4.47246 0 0 527971. 3666.47 0.17 0.37 0.07 -1 -1 0.17 0.142673 0.127129 +k6_N10_40nm.xml frisc.pre-vpr.blif common 27.33 vpr 78.79 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 242 20 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 80676 20 116 3175 3291 1 1258 378 18 18 324 clb auto 41.4 MiB 0.98 15098 76874 18624 53343 4907 78.8 MiB 1.11 0.02 9.88726 -5092.35 -9.88726 9.88726 1.01 0.00597592 0.00527929 0.409808 0.353672 60 25951 46 4.608e+06 4.356e+06 1.28013e+06 3951.02 13.10 2.54594 2.18414 29492 257832 -1 21144 26 8391 35581 1929606 263963 9.98478 9.98478 -5145.99 -9.98478 0 0 1.60155e+06 4943.04 0.56 0.88 0.25 -1 -1 0.56 0.42371 0.374449 +k6_N10_40nm.xml misex3.pre-vpr.blif common 7.52 vpr 64.44 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 86 14 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 65988 14 14 828 842 0 506 114 12 12 144 clb auto 26.8 MiB 0.36 4946 8118 1259 6320 539 64.4 MiB 0.18 0.00 4.99427 -64.5608 -4.99427 nan 0.38 0.00202505 0.0017544 0.076652 0.0683471 50 7449 35 1.8e+06 1.548e+06 439064. 3049.06 4.16 0.859461 0.745897 11896 86528 -1 6915 25 4736 20579 777903 117189 4.85973 nan -63.5848 -4.85973 0 0 562980. 3909.58 0.18 0.32 0.08 -1 -1 0.18 0.128932 0.1166 +k6_N10_40nm.xml pdc.pre-vpr.blif common 31.39 vpr 83.04 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 311 16 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 85032 16 40 2839 2879 0 1521 367 20 20 400 clb auto 46.5 MiB 1.32 24941 68643 17504 47903 3236 83.0 MiB 1.50 0.03 7.11101 -257.274 -7.11101 nan 1.32 0.00814889 0.00674352 0.494696 0.420888 78 41725 45 5.832e+06 5.598e+06 2.00674e+06 5016.85 17.98 3.39922 2.87425 41300 418538 -1 35794 21 11768 65698 3038822 387442 7.18904 nan -255.486 -7.18904 0 0 2.53133e+06 6328.34 0.99 1.44 0.44 -1 -1 0.99 0.539876 0.484595 +k6_N10_40nm.xml s298.pre-vpr.blif common 5.07 vpr 63.72 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 77 4 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 65248 4 6 726 732 1 405 87 11 11 121 clb auto 26.1 MiB 0.31 3932 4503 604 3722 177 63.7 MiB 0.12 0.00 7.20975 -57.1746 -7.20975 7.20975 0.29 0.00160903 0.00138311 0.0565681 0.0504102 44 6222 32 1.458e+06 1.386e+06 324964. 2685.65 2.03 0.544024 0.472793 9582 65203 -1 5554 23 3368 16222 637492 96172 6.94914 6.94914 -58.3727 -6.94914 0 0 420935. 3478.80 0.13 0.28 0.05 -1 -1 0.13 0.123093 0.110761 +k6_N10_40nm.xml s38584.1.pre-vpr.blif common 37.83 vpr 87.75 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 376 38 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 89856 39 304 4677 4982 1 2202 719 22 22 484 clb auto 50.4 MiB 1.17 15134 255020 73345 166893 14782 87.8 MiB 2.42 0.03 5.66276 -3253.04 -5.66276 5.66276 1.70 0.00802743 0.00713342 0.736128 0.638028 40 23370 39 7.2e+06 6.768e+06 1.34575e+06 2780.48 12.71 4.12792 3.53956 37996 272632 -1 21030 29 11717 32882 1453579 252466 5.64983 5.64983 -3333.79 -5.64983 0 0 1.68761e+06 3486.79 0.69 1.00 0.26 -1 -1 0.69 0.594158 0.523275 +k6_N10_40nm.xml seq.pre-vpr.blif common 10.51 vpr 66.10 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 101 41 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67684 41 35 1006 1041 0 615 177 13 13 169 clb auto 28.5 MiB 0.48 6961 16049 2946 11365 1738 66.1 MiB 0.28 0.01 5.03195 -143.664 -5.03195 nan 0.44 0.0026392 0.0023032 0.104441 0.0926794 60 12020 38 2.178e+06 1.818e+06 630658. 3731.70 6.55 1.20996 1.04611 15198 124941 -1 10354 24 5139 23293 962130 137023 5.06992 nan -144.115 -5.06992 0 0 788291. 4664.44 0.26 0.42 0.12 -1 -1 0.26 0.167051 0.150709 +k6_N10_40nm.xml spla.pre-vpr.blif common 20.31 vpr 77.93 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 241 16 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 79800 16 46 2232 2278 0 1201 303 18 18 324 clb auto 40.5 MiB 1.02 17032 51153 12014 36600 2539 77.9 MiB 1.10 0.02 6.29481 -217.773 -6.29481 nan 1.01 0.00692542 0.00584914 0.405951 0.347549 70 30122 46 4.608e+06 4.338e+06 1.48298e+06 4577.10 9.27 2.30812 1.96597 31752 300704 -1 25154 21 9819 54210 2459519 309528 6.20862 nan -218.942 -6.20862 0 0 1.85205e+06 5716.21 0.68 1.04 0.30 -1 -1 0.68 0.377258 0.335776 +k6_N10_40nm.xml tseng.pre-vpr.blif common 6.73 vpr 66.39 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 105 52 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:57 gh-actions-runner-vtr-auto-spawned31 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67984 52 122 1461 1583 1 525 279 13 13 169 clb auto 28.5 MiB 0.30 3192 45829 11147 32251 2431 66.4 MiB 0.38 0.01 5.68935 -1256.78 -5.68935 5.68935 0.45 0.00301241 0.00265536 0.153193 0.135734 30 5287 29 2.178e+06 1.89e+06 350324. 2072.92 2.78 0.871199 0.757648 12006 67531 -1 4334 25 2585 6950 258089 51105 5.34065 5.34065 -1250.43 -5.34065 0 0 430798. 2549.10 0.16 0.21 0.06 -1 -1 0.16 0.136378 0.121702 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/vpr_reg_mcnc_equiv/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/vpr_reg_mcnc_equiv/config/golden_results.txt index 7d481319553..d627c7d0b85 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/vpr_reg_mcnc_equiv/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/vpr_reg_mcnc_equiv/config/golden_results.txt @@ -1,20 +1,20 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -k6_N10_40nm.xml alu4.pre-vpr.blif common 9.29 vpr 65.08 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 96 14 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66644 14 8 926 934 0 478 118 12 12 144 clb auto 27.6 MiB 0.41 4805 7921 1196 6264 461 65.1 MiB 0.18 0.01 4.7509 -33.2503 -4.7509 nan 0.36 0.00245534 0.00218398 0.0861248 0.0762105 74 6597 21 5.3894e+06 5.17382e+06 608941. 4228.75 6.03 1.1052 0.963163 14184 119952 -1 6717 44 4137 20437 842824 113986 4.58526 nan -32.9374 -4.58526 0 0 758555. 5267.75 0.22 0.28 0.12 -1 -1 0.22 0.124145 0.110592 -k6_N10_40nm.xml apex2.pre-vpr.blif common 8.94 vpr 67.36 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 114 38 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68972 39 3 1113 1117 0 655 156 13 13 169 clb auto 29.7 MiB 0.53 7970 14402 2695 10284 1423 67.4 MiB 0.30 0.01 5.67046 -16.7656 -5.67046 nan 0.44 0.00249151 0.00212297 0.115197 0.101904 74 12212 25 6.52117e+06 6.14392e+06 728195. 4308.85 5.41 0.941858 0.815146 16710 144151 -1 12100 18 6056 29564 1238130 177195 5.46327 nan -16.0252 -5.46327 0 0 906856. 5366.01 0.19 0.29 0.08 -1 -1 0.19 0.0956231 0.0877335 -k6_N10_40nm.xml apex4.pre-vpr.blif common 7.93 vpr 65.52 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 95 9 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67096 9 19 897 916 0 536 123 12 12 144 clb auto 27.8 MiB 0.45 6343 9578 1685 7369 524 65.5 MiB 0.23 0.01 4.76124 -79.5577 -4.76124 nan 0.36 0.00226599 0.00196452 0.0943993 0.0837185 62 10552 34 5.3894e+06 5.11993e+06 523024. 3632.11 4.35 0.89265 0.774996 13040 101000 -1 9514 34 6114 31519 1235789 184059 5.08979 nan -80.98 -5.08979 0 0 643745. 4470.45 0.21 0.57 0.10 -1 -1 0.21 0.212584 0.188789 -k6_N10_40nm.xml bigkey.pre-vpr.blif common 8.57 vpr 66.60 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 94 229 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68196 263 197 1372 1603 1 490 554 17 17 289 io auto 28.6 MiB 0.32 4429 175652 50348 113731 11573 66.6 MiB 0.78 0.02 2.82334 -708.457 -2.82334 2.82334 0.89 0.00309848 0.00277799 0.251899 0.224174 34 7778 18 1.21262e+07 5.06604e+06 661981. 2290.59 3.59 0.946112 0.847675 21366 128092 -1 7371 18 2453 11694 676065 127375 3.03973 3.03973 -763.206 -3.03973 0 0 811075. 2806.49 0.31 0.31 0.12 -1 -1 0.31 0.133578 0.12208 -k6_N10_40nm.xml clma.pre-vpr.blif common 42.14 vpr 100.22 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 380 62 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 102628 383 82 3674 4077 1 2327 845 22 22 484 clb auto 53.9 MiB 1.73 30495 408131 149393 204426 54312 91.1 MiB 4.72 0.05 8.38463 -355.215 -8.38463 8.38463 1.71 0.0104313 0.00935194 1.25162 1.04414 86 48680 35 2.15576e+07 2.04797e+07 2.58188e+06 5334.46 24.07 5.1352 4.28764 52488 536144 -1 43642 27 20275 90858 4018144 545419 8.17711 8.17711 -353.933 -8.17711 0 0 3.23937e+06 6692.90 0.85 1.18 0.33 -1 -1 0.85 0.436135 0.383384 -k6_N10_40nm.xml des.pre-vpr.blif common 7.63 vpr 64.74 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 99 256 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66292 256 245 954 1199 0 610 600 18 18 324 io auto 26.8 MiB 0.25 4908 149811 41746 100723 7342 64.7 MiB 0.59 0.01 3.98472 -745.824 -3.98472 nan 1.02 0.00279852 0.00260384 0.181571 0.168921 32 8452 46 1.37969e+07 5.33551e+06 718733. 2218.31 2.72 0.697381 0.647433 23676 138656 -1 7309 16 2635 6179 382431 81452 4.25723 nan -787.373 -4.25723 0 0 879796. 2715.42 0.35 0.22 0.12 -1 -1 0.35 0.11147 0.104644 -k6_N10_40nm.xml diffeq.pre-vpr.blif common 5.96 vpr 65.98 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 97 64 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67564 64 39 1371 1410 1 553 200 12 12 144 clb auto 28.1 MiB 0.33 3782 22392 4498 16716 1178 66.0 MiB 0.28 0.01 5.76255 -1080.02 -5.76255 5.76255 0.39 0.00268109 0.00242071 0.122136 0.108269 46 5161 24 5.3894e+06 5.22772e+06 394751. 2741.33 1.80 0.696206 0.60919 11608 77537 -1 5089 24 3148 10672 380947 63653 5.40496 5.40496 -1070.98 -5.40496 0 0 505417. 3509.84 0.17 0.25 0.08 -1 -1 0.17 0.143015 0.128603 -k6_N10_40nm.xml dsip.pre-vpr.blif common 9.46 vpr 66.47 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 95 229 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68068 229 197 1370 1567 1 535 521 16 16 256 io auto 28.5 MiB 0.33 4249 145076 42098 96002 6976 66.5 MiB 0.71 0.01 2.82038 -687.741 -2.82038 2.82038 0.74 0.00305502 0.00278554 0.230386 0.204932 34 7820 28 1.05632e+07 5.11993e+06 580208. 2266.44 4.88 1.03291 0.925929 18880 112045 -1 7420 18 2762 9849 588259 120916 2.94626 2.94626 -745.332 -2.94626 0 0 710900. 2776.95 0.30 0.34 0.10 -1 -1 0.30 0.158768 0.144045 -k6_N10_40nm.xml elliptic.pre-vpr.blif common 26.78 vpr 78.45 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 230 131 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 80328 131 114 3421 3535 1 1217 475 18 18 324 clb auto 41.3 MiB 0.99 11435 128263 36626 85751 5886 78.4 MiB 1.41 0.02 7.47596 -4443.09 -7.47596 7.47596 1.05 0.00689155 0.00617445 0.553306 0.477884 50 20115 48 1.37969e+07 1.23956e+07 1.02665e+06 3168.68 13.47 3.20144 2.75548 27232 203968 -1 16697 23 7740 32519 1480622 220162 7.27428 7.27428 -4524.79 -7.27428 0 0 1.31637e+06 4062.87 0.50 0.75 0.18 -1 -1 0.50 0.386104 0.344498 -k6_N10_40nm.xml ex1010.pre-vpr.blif common 46.88 vpr 85.48 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 302 10 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 87536 10 10 2659 2669 0 1417 322 20 20 400 clb auto 45.6 MiB 1.55 26735 58781 15782 41072 1927 82.1 MiB 1.51 0.03 6.79311 -65.8142 -6.79311 nan 1.35 0.0085124 0.00699839 0.554051 0.464812 98 46081 40 1.74617e+07 1.6276e+07 2.35420e+06 5885.50 33.93 5.65722 4.78731 46488 495728 -1 40094 26 13124 79778 4200582 472080 6.72249 nan -64.7071 -6.72249 0 0 2.96690e+06 7417.26 1.14 1.68 0.56 -1 -1 1.14 0.520613 0.459683 -k6_N10_40nm.xml ex5p.pre-vpr.blif common 8.38 vpr 63.84 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 78 8 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 65376 8 63 761 824 0 443 149 11 11 121 clb auto 26.0 MiB 0.34 4420 9999 1667 7706 626 63.8 MiB 0.17 0.01 4.13681 -180.38 -4.13681 nan 0.29 0.00211285 0.00182583 0.0720785 0.0644836 68 6844 29 4.36541e+06 4.20373e+06 471571. 3897.28 5.57 1.06761 0.928764 11382 90811 -1 6299 27 3950 17583 680545 104378 4.04861 nan -185.982 -4.04861 0 0 579861. 4792.24 0.19 0.34 0.09 -1 -1 0.19 0.141177 0.126817 -k6_N10_40nm.xml frisc.pre-vpr.blif common 30.81 vpr 78.66 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 240 20 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 80548 20 116 3175 3291 1 1254 376 18 18 324 clb auto 41.3 MiB 1.01 14937 83092 21035 56910 5147 78.7 MiB 1.24 0.02 9.37137 -4836.23 -9.37137 9.37137 1.06 0.00697286 0.00631498 0.487542 0.424923 66 23239 30 1.37969e+07 1.29346e+07 1.36437e+06 4211.00 15.48 4.05596 3.47527 30784 270180 -1 20879 24 8723 36885 1817074 266114 9.12568 9.12568 -4847.74 -9.12568 0 0 1.68162e+06 5190.19 0.68 0.96 0.27 -1 -1 0.68 0.464555 0.409006 -k6_N10_40nm.xml misex3.pre-vpr.blif common 8.14 vpr 64.57 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 87 14 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66124 14 14 828 842 0 488 115 12 12 144 clb auto 26.8 MiB 0.37 4941 5695 788 4522 385 64.6 MiB 0.13 0.01 4.69826 -60.2129 -4.69826 nan 0.36 0.00194998 0.00170438 0.0577851 0.0517467 54 8009 47 5.3894e+06 4.68878e+06 451357. 3134.42 4.78 1.0707 0.928432 12324 89954 -1 7171 21 4179 17710 662276 104535 4.54449 nan -60.4911 -4.54449 0 0 586610. 4073.68 0.20 0.33 0.09 -1 -1 0.20 0.137883 0.125264 -k6_N10_40nm.xml pdc.pre-vpr.blif common 32.65 vpr 82.86 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 307 16 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 84852 16 40 2839 2879 0 1501 363 20 20 400 clb auto 46.3 MiB 1.35 24796 68945 17552 48401 2992 82.9 MiB 1.52 0.02 7.00232 -251.23 -7.00232 nan 1.36 0.00825615 0.00726621 0.537715 0.450301 80 44574 39 1.74617e+07 1.65455e+07 1.96642e+06 4916.06 17.86 3.70284 3.07411 41700 405380 -1 36503 18 12436 74071 3266555 428458 6.7432 nan -249.933 -6.7432 0 0 2.46811e+06 6170.27 1.06 1.45 0.44 -1 -1 1.06 0.492521 0.439734 -k6_N10_40nm.xml s298.pre-vpr.blif common 5.16 vpr 63.29 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 77 4 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 64812 4 6 726 732 1 378 87 11 11 121 clb auto 25.6 MiB 0.33 3693 3735 430 3194 111 63.3 MiB 0.12 0.00 6.30858 -51.7143 -6.30858 6.30858 0.29 0.00206524 0.0018247 0.0575937 0.0523635 44 6200 32 4.36541e+06 4.14984e+06 309216. 2555.51 1.92 0.492503 0.432481 9582 61621 -1 5457 22 3588 18601 711043 111359 6.51519 6.51519 -54.7562 -6.51519 0 0 401578. 3318.83 0.13 0.30 0.06 -1 -1 0.13 0.11595 0.105363 -k6_N10_40nm.xml s38584.1.pre-vpr.blif common 37.82 vpr 87.45 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 368 38 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 89548 39 304 4677 4982 1 2229 711 22 22 484 clb auto 50.6 MiB 1.27 15892 248127 72238 162853 13036 87.4 MiB 2.76 0.04 5.02641 -3215.78 -5.02641 5.02641 1.86 0.00946056 0.0084162 0.850041 0.726935 44 24233 33 2.15576e+07 1.9833e+07 1.41060e+06 2914.46 9.06 3.67401 3.14581 39444 288878 -1 21443 23 12112 33710 1370500 248738 5.15976 5.15976 -3289.71 -5.15976 0 0 1.82601e+06 3772.76 0.88 1.06 0.28 -1 -1 0.88 0.611244 0.534232 -k6_N10_40nm.xml seq.pre-vpr.blif common 8.06 vpr 65.75 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 103 41 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67332 41 35 1006 1041 0 588 179 13 13 169 clb auto 28.2 MiB 0.48 6846 17819 3270 12655 1894 65.8 MiB 0.31 0.01 4.96104 -139.99 -4.96104 nan 0.44 0.00266866 0.00225678 0.118497 0.104761 56 12202 44 6.52117e+06 5.55108e+06 559864. 3312.80 4.05 0.848326 0.735081 14694 110679 -1 10652 23 5478 25529 1041333 159644 4.77402 nan -140.731 -4.77402 0 0 714795. 4229.55 0.23 0.47 0.11 -1 -1 0.23 0.169854 0.153167 -k6_N10_40nm.xml spla.pre-vpr.blif common 32.64 vpr 77.38 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 245 16 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 79240 16 46 2232 2278 0 1154 307 18 18 324 clb auto 40.0 MiB 1.04 17066 52057 12539 36561 2957 77.4 MiB 1.10 0.02 6.1923 -216.925 -6.1923 nan 1.04 0.00646966 0.00573086 0.413673 0.351269 70 29237 30 1.37969e+07 1.3204e+07 1.42834e+06 4408.47 21.13 4.16608 3.52139 31752 286880 -1 25273 21 9773 55725 2341275 316396 6.1326 nan -216.962 -6.1326 0 0 1.78317e+06 5503.60 0.72 1.16 0.29 -1 -1 0.72 0.428936 0.383431 -k6_N10_40nm.xml tseng.pre-vpr.blif common 7.00 vpr 65.94 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 110 52 -1 -1 success 8f82416-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-02T00:48:13 gh-actions-runner-vtr-auto-spawned84 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67524 52 122 1461 1583 1 509 284 13 13 169 clb auto 28.1 MiB 0.30 3129 40403 8633 29315 2455 65.9 MiB 0.32 0.01 5.00636 -1311.79 -5.00636 5.00636 0.44 0.00242755 0.00214846 0.127974 0.114495 32 5107 32 6.52117e+06 5.92834e+06 352895. 2088.14 3.01 0.951342 0.834771 12174 67024 -1 4717 29 2763 8262 352580 70758 4.58906 4.58906 -1300.62 -4.58906 0 0 431135. 2551.09 0.16 0.27 0.06 -1 -1 0.16 0.167609 0.149371 +k6_N10_40nm.xml alu4.pre-vpr.blif common 8.71 vpr 65.38 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 97 14 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66944 14 8 926 934 0 517 119 12 12 144 clb auto 27.6 MiB 0.40 4978 9171 1462 7157 552 65.4 MiB 0.21 0.01 4.85854 -36.6925 -4.85854 nan 0.38 0.00226244 0.001991 0.0945504 0.082642 52 6841 26 1.8e+06 1.746e+06 452692. 3143.70 5.22 1.22756 1.05858 12180 91053 -1 6642 23 3992 14975 576861 90555 4.88853 nan -35.9466 -4.88853 0 0 594734. 4130.10 0.19 0.31 0.08 -1 -1 0.19 0.150949 0.136693 +k6_N10_40nm.xml apex2.pre-vpr.blif common 9.58 vpr 67.03 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 115 38 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68636 39 3 1113 1117 0 675 157 13 13 169 clb auto 29.5 MiB 0.53 7879 13693 2337 10136 1220 67.0 MiB 0.30 0.01 6.02154 -17.5859 -6.02154 nan 0.45 0.0028102 0.00246118 0.117499 0.103818 62 13086 48 2.178e+06 2.07e+06 652532. 3861.14 6.02 1.0391 0.894937 15366 127615 -1 11563 30 6785 30387 1264252 174643 5.82632 nan -17.1242 -5.82632 0 0 801739. 4744.02 0.17 0.37 0.07 -1 -1 0.17 0.134439 0.121141 +k6_N10_40nm.xml apex4.pre-vpr.blif common 7.04 vpr 65.39 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 95 9 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66960 9 19 897 916 0 569 123 12 12 144 clb auto 27.6 MiB 0.47 6709 9578 1569 7337 672 65.4 MiB 0.22 0.01 5.16784 -84.6504 -5.16784 nan 0.37 0.00236091 0.00202291 0.0896899 0.0795875 62 10796 38 1.8e+06 1.71e+06 546237. 3793.31 3.57 0.718352 0.625054 13040 106280 -1 9528 24 5403 25371 1032193 149160 5.31039 nan -84.6173 -5.31039 0 0 671089. 4660.34 0.21 0.41 0.10 -1 -1 0.21 0.148545 0.133948 +k6_N10_40nm.xml bigkey.pre-vpr.blif common 9.29 vpr 66.86 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 94 229 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68464 263 197 1372 1603 1 490 554 17 17 289 io auto 29.1 MiB 0.32 4312 159734 46622 103443 9669 66.9 MiB 0.69 0.01 3.07033 -729.814 -3.07033 3.07033 0.91 0.00284645 0.00259273 0.224153 0.200395 34 7551 23 4.05e+06 1.692e+06 688919. 2383.80 4.39 0.960078 0.860808 21366 134962 -1 7110 17 2315 10686 592079 112425 3.1266 3.1266 -779.621 -3.1266 0 0 845950. 2927.16 0.34 0.29 0.12 -1 -1 0.34 0.137212 0.125624 +k6_N10_40nm.xml clma.pre-vpr.blif common 35.55 vpr 90.90 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 378 62 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 93080 383 82 3674 4077 1 2351 843 22 22 484 clb auto 53.7 MiB 1.73 30084 406803 150647 202517 53639 90.9 MiB 4.76 0.05 8.58818 -372.88 -8.58818 8.58818 1.73 0.0116669 0.00952614 1.2421 1.0465 80 49364 38 7.2e+06 6.804e+06 2.49993e+06 5165.15 17.71 4.82435 4.06394 50556 522948 -1 43579 26 21059 94912 4449370 570004 8.46141 8.46141 -379.797 -8.46141 0 0 3.14482e+06 6497.55 0.85 1.29 0.30 -1 -1 0.85 0.454226 0.400725 +k6_N10_40nm.xml des.pre-vpr.blif common 10.21 vpr 64.85 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 101 256 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66408 256 245 954 1199 0 613 602 18 18 324 io auto 26.6 MiB 0.24 4828 140357 37125 96029 7203 64.9 MiB 0.56 0.01 4.31026 -789.244 -4.31026 nan 1.06 0.00295092 0.00275592 0.172656 0.160844 34 7705 26 4.608e+06 1.818e+06 779010. 2404.35 5.17 1.062 0.987559 24000 152888 -1 7130 18 2525 6134 402061 81348 4.49788 nan -806.729 -4.49788 0 0 956463. 2952.05 0.38 0.23 0.13 -1 -1 0.38 0.123016 0.115293 +k6_N10_40nm.xml diffeq.pre-vpr.blif common 7.40 vpr 65.88 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 95 64 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67460 64 39 1371 1410 1 554 198 12 12 144 clb auto 28.0 MiB 0.34 3809 23814 5171 17235 1408 65.9 MiB 0.27 0.01 6.20988 -1165.59 -6.20988 6.20988 0.37 0.0024161 0.00215718 0.116118 0.102969 34 5732 42 1.8e+06 1.71e+06 320785. 2227.68 3.33 1.059 0.922835 10464 62065 -1 5209 22 3174 9420 385735 66218 6.26652 6.26652 -1186.82 -6.26652 0 0 394711. 2741.05 0.14 0.25 0.05 -1 -1 0.14 0.144545 0.129925 +k6_N10_40nm.xml dsip.pre-vpr.blif common 9.29 vpr 66.72 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 97 229 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68324 229 197 1370 1567 1 538 523 16 16 256 io auto 28.7 MiB 0.35 4639 156367 46237 102482 7648 66.7 MiB 0.78 0.01 3.20322 -726.151 -3.20322 3.20322 0.76 0.00358135 0.00313658 0.24981 0.224605 34 8368 47 3.528e+06 1.746e+06 604079. 2359.69 4.65 1.11832 1.00687 18880 118149 -1 7459 13 2689 9149 571229 113616 3.28489 3.28489 -779.779 -3.28489 0 0 742044. 2898.61 0.29 0.26 0.11 -1 -1 0.29 0.118119 0.1095 +k6_N10_40nm.xml elliptic.pre-vpr.blif common 25.49 vpr 78.52 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 235 131 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 80400 131 114 3421 3535 1 1210 480 18 18 324 clb auto 41.4 MiB 0.98 11455 124428 34728 84130 5570 78.5 MiB 1.30 0.02 8.08387 -4695.24 -8.08387 8.08387 1.02 0.00650682 0.00542484 0.467206 0.395934 50 19350 40 4.608e+06 4.23e+06 1.06618e+06 3290.67 8.28 2.31175 1.98073 27232 214208 -1 16443 24 8056 35475 1612694 233265 7.93357 7.93357 -4714.53 -7.93357 0 0 1.36711e+06 4219.48 0.54 0.89 0.20 -1 -1 0.54 0.454355 0.401092 +k6_N10_40nm.xml ex1010.pre-vpr.blif common 49.10 vpr 81.94 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 299 10 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 83904 10 10 2659 2669 0 1414 319 20 20 400 clb auto 45.5 MiB 1.48 26670 58036 15610 40440 1986 81.9 MiB 1.44 0.02 6.99148 -66.5608 -6.99148 nan 1.36 0.00806264 0.0070899 0.517831 0.437174 90 47190 31 5.832e+06 5.382e+06 2.27845e+06 5696.13 36.26 4.35191 3.65217 44092 472493 -1 40435 27 13453 81187 4495090 507770 6.68929 nan -64.8044 -6.68929 0 0 2.84047e+06 7101.17 1.14 1.88 0.49 -1 -1 1.14 0.603621 0.535117 +k6_N10_40nm.xml ex5p.pre-vpr.blif common 8.89 vpr 63.96 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 82 8 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 65500 8 63 761 824 0 451 153 12 12 144 clb auto 26.2 MiB 0.33 4564 13650 2467 10070 1113 64.0 MiB 0.21 0.01 4.36001 -198.144 -4.36001 nan 0.37 0.00195987 0.00169055 0.0831115 0.0735505 46 8116 47 1.8e+06 1.476e+06 409728. 2845.33 5.88 1.06237 0.917436 11608 81817 -1 7004 30 4945 21003 896494 141905 4.47246 nan -203.996 -4.47246 0 0 527971. 3666.47 0.17 0.39 0.07 -1 -1 0.17 0.148506 0.132448 +k6_N10_40nm.xml frisc.pre-vpr.blif common 28.67 vpr 78.51 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 242 20 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 80392 20 116 3175 3291 1 1258 378 18 18 324 clb auto 41.2 MiB 1.00 15098 76874 18624 53343 4907 78.5 MiB 1.16 0.02 9.88726 -5092.35 -9.88726 9.88726 1.05 0.00665797 0.00599117 0.442129 0.381299 60 25951 46 4.608e+06 4.356e+06 1.28013e+06 3951.02 13.75 2.84249 2.45024 29492 257832 -1 21144 26 8391 35581 1929606 263963 9.98478 9.98478 -5145.99 -9.98478 0 0 1.60155e+06 4943.04 0.59 0.88 0.26 -1 -1 0.59 0.4177 0.369904 +k6_N10_40nm.xml misex3.pre-vpr.blif common 7.77 vpr 64.44 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 86 14 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 65984 14 14 828 842 0 506 114 12 12 144 clb auto 26.9 MiB 0.37 4946 8118 1259 6320 539 64.4 MiB 0.19 0.01 4.99427 -64.5608 -4.99427 nan 0.37 0.00211355 0.00186547 0.0833963 0.074194 50 7449 35 1.8e+06 1.548e+06 439064. 3049.06 4.35 0.920941 0.800099 11896 86528 -1 6915 25 4736 20579 777903 117189 4.85973 nan -63.5848 -4.85973 0 0 562980. 3909.58 0.18 0.37 0.08 -1 -1 0.18 0.148807 0.134079 +k6_N10_40nm.xml pdc.pre-vpr.blif common 33.01 vpr 82.99 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 311 16 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 84984 16 40 2839 2879 0 1521 367 20 20 400 clb auto 46.4 MiB 1.35 24941 68643 17504 47903 3236 83.0 MiB 1.44 0.03 7.11101 -257.274 -7.11101 nan 1.36 0.00744919 0.00604379 0.482496 0.405502 78 41725 45 5.832e+06 5.598e+06 2.00674e+06 5016.85 18.40 3.44483 2.89111 41300 418538 -1 35794 21 11768 65698 3038822 387442 7.18904 nan -255.486 -7.18904 0 0 2.53133e+06 6328.34 0.94 1.47 0.42 -1 -1 0.94 0.543167 0.482265 +k6_N10_40nm.xml s298.pre-vpr.blif common 5.30 vpr 63.47 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 77 4 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 64992 4 6 726 732 1 405 87 11 11 121 clb auto 25.8 MiB 0.32 3932 4503 604 3722 177 63.5 MiB 0.12 0.00 7.20975 -57.1746 -7.20975 7.20975 0.30 0.00166347 0.00143928 0.0567918 0.0510537 44 6222 32 1.458e+06 1.386e+06 324964. 2685.65 2.08 0.534956 0.467537 9582 65203 -1 5554 23 3368 16222 637492 96172 6.94914 6.94914 -58.3727 -6.94914 0 0 420935. 3478.80 0.13 0.31 0.06 -1 -1 0.13 0.133267 0.120808 +k6_N10_40nm.xml s38584.1.pre-vpr.blif common 42.39 vpr 87.91 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 376 38 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 90024 39 304 4677 4982 1 2202 719 22 22 484 clb auto 50.5 MiB 1.20 15134 255020 73345 166893 14782 87.9 MiB 2.48 0.03 5.66276 -3253.04 -5.66276 5.66276 1.70 0.00895338 0.0080486 0.769275 0.668321 40 23370 39 7.2e+06 6.768e+06 1.34575e+06 2780.48 12.86 4.27094 3.66185 37996 272632 -1 21030 29 11717 32882 1453579 252466 5.64983 5.64983 -3333.79 -5.64983 0 0 1.68761e+06 3486.79 0.72 1.11 0.25 -1 -1 0.72 0.683978 0.600785 +k6_N10_40nm.xml seq.pre-vpr.blif common 10.86 vpr 66.18 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 101 41 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67768 41 35 1006 1041 0 615 177 13 13 169 clb auto 28.6 MiB 0.51 6961 16049 2946 11365 1738 66.2 MiB 0.27 0.01 5.03195 -143.664 -5.03195 nan 0.45 0.00259569 0.0022472 0.102585 0.091288 60 12020 38 2.178e+06 1.818e+06 630658. 3731.70 6.85 1.30662 1.13307 15198 124941 -1 10354 24 5139 23293 962130 137023 5.06992 nan -144.115 -5.06992 0 0 788291. 4664.44 0.25 0.44 0.12 -1 -1 0.25 0.175092 0.157621 +k6_N10_40nm.xml spla.pre-vpr.blif common 20.76 vpr 77.89 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 241 16 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 79760 16 46 2232 2278 0 1201 303 18 18 324 clb auto 40.5 MiB 1.05 17032 51153 12014 36600 2539 77.9 MiB 1.04 0.02 6.29481 -217.773 -6.29481 nan 1.01 0.00634457 0.00524369 0.372158 0.314051 70 30122 46 4.608e+06 4.338e+06 1.48298e+06 4577.10 9.54 2.28636 1.93005 31752 300704 -1 25154 21 9819 54210 2459519 309528 6.20862 nan -218.942 -6.20862 0 0 1.85205e+06 5716.21 0.67 1.10 0.31 -1 -1 0.67 0.400553 0.355764 +k6_N10_40nm.xml tseng.pre-vpr.blif common 6.78 vpr 66.42 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 105 52 -1 -1 success ee5eb02-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-16T14:20:50 gh-actions-runner-vtr-auto-spawned135 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68012 52 122 1461 1583 1 525 279 13 13 169 clb auto 28.5 MiB 0.31 3192 45829 11147 32251 2431 66.4 MiB 0.37 0.01 5.68935 -1256.78 -5.68935 5.68935 0.45 0.00254401 0.00225098 0.140974 0.124712 30 5287 29 2.178e+06 1.89e+06 350324. 2072.92 2.82 0.851944 0.744919 12006 67531 -1 4334 25 2585 6950 258089 51105 5.34065 5.34065 -1250.43 -5.34065 0 0 430798. 2549.10 0.16 0.22 0.06 -1 -1 0.16 0.142636 0.127565 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_aliases/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_aliases/config/golden_results.txt index 68062e583c5..90d9f177c7c 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_aliases/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_aliases/config/golden_results.txt @@ -1,4 +1,4 @@ - arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time - timing/k6_N10_40nm.xml clock_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/clk.sdc 0.22 vpr 56.38 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 1 -1 -1 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 57728 1 4 28 32 2 10 9 4 4 16 clb auto 17.5 MiB 0.00 20 56.4 MiB 0.00 0.00 2.18276 0 0 2.18276 0.01 1.8962e-05 1.3558e-05 0.000245191 0.000217063 8 18 4 215576 215576 5503.53 343.971 0.01 0.00299768 0.00243283 12 4 18 18 422 145 2.20417 2.20417 0 0 0 0 6317.10 394.819 0.00 0.00 0.000845095 0.000768207 - timing/k6_N10_40nm.xml clock_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/clk_assign.sdc 0.27 vpr 56.31 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 1 -1 -1 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 57664 1 4 28 32 2 10 9 4 4 16 clb auto 17.5 MiB 0.00 20 56.3 MiB 0.00 0.00 2.18276 0 0 2.18276 0.01 1.974e-05 1.4252e-05 0.000246873 0.0002189 8 18 4 215576 215576 5503.53 343.971 0.01 0.00244336 0.00196958 12 4 18 18 422 145 2.20417 2.20417 0 0 0 0 6317.10 394.819 0.00 0.00 0.000719288 0.000650332 - timing/k6_N10_40nm.xml clock_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/counter_clk.sdc 0.29 vpr 56.34 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 1 -1 -1 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 57696 1 4 28 32 2 10 9 4 4 16 clb auto 17.5 MiB 0.01 20 56.3 MiB 0.00 0.00 2.18276 0 0 2.18276 0.01 1.9454e-05 1.4029e-05 0.000249021 0.000220542 8 18 4 215576 215576 5503.53 343.971 0.01 0.00264112 0.00212219 12 4 18 18 422 145 2.20417 2.20417 0 0 0 0 6317.10 394.819 0.00 0.00 0.000734735 0.000667954 +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +timing/k6_N10_40nm.xml clock_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/clk.sdc 0.33 vpr 57.89 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 1 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 59280 1 4 28 32 2 10 9 4 4 16 clb auto 19.6 MiB 0.01 20 27 15 8 4 57.9 MiB 0.00 0.00 2.44626 0 0 2.44626 0.02 6.522e-05 5.8807e-05 0.000520276 0.000480282 8 12 5 72000 72000 5593.62 349.601 0.03 0.00762819 0.00638476 672 1128 -1 12 6 24 24 485 152 2.38921 2.38921 0 0 0 0 6492.02 405.751 0.00 0.00 0.00 -1 -1 0.00 0.00235121 0.00216168 +timing/k6_N10_40nm.xml clock_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/clk_assign.sdc 0.32 vpr 57.76 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 1 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 59144 1 4 28 32 2 10 9 4 4 16 clb auto 19.5 MiB 0.01 20 27 15 8 4 57.8 MiB 0.00 0.00 2.44626 0 0 2.44626 0.01 7.6909e-05 6.9134e-05 0.000530933 0.000491554 8 12 5 72000 72000 5593.62 349.601 0.03 0.00769087 0.00644081 672 1128 -1 12 6 24 24 485 152 2.38921 2.38921 0 0 0 0 6492.02 405.751 0.00 0.00 0.00 -1 -1 0.00 0.00237431 0.00218377 +timing/k6_N10_40nm.xml clock_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/counter_clk.sdc 0.32 vpr 57.91 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 1 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 59304 1 4 28 32 2 10 9 4 4 16 clb auto 19.6 MiB 0.01 20 27 15 8 4 57.9 MiB 0.00 0.00 2.44626 0 0 2.44626 0.01 5.6889e-05 5.0331e-05 0.000476198 0.000438042 8 12 5 72000 72000 5593.62 349.601 0.03 0.00727055 0.00604445 672 1128 -1 12 6 24 24 485 152 2.38921 2.38921 0 0 0 0 6492.02 405.751 0.00 0.00 0.00 -1 -1 0.00 0.00234916 0.00215934 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_aliases_set_delay/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_aliases_set_delay/config/golden_results.txt index f2cadd4e550..e34e4d38d17 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_aliases_set_delay/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_aliases_set_delay/config/golden_results.txt @@ -1,2 +1,2 @@ - arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time - timing/k6_N10_40nm.xml clock_set_delay_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/set_delay.sdc 0.21 vpr 56.37 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2 2 -1 -1 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 57720 2 2 22 24 2 4 6 4 4 16 clb auto 18.1 MiB 0.00 4 56.4 MiB 0.00 0.00 1.293 0 0 1.293 0.01 1.7316e-05 1.2051e-05 0.000172483 0.000143849 6 6 1 215576 107788 3924.73 245.296 0.00 0.000640147 0.000558617 9 3 5 5 233 128 1.293 1.293 0 0 0 0 5503.53 343.971 0.00 0.00 0.000444617 0.000393194 +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +timing/k6_N10_40nm.xml clock_set_delay_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/set_delay.sdc 0.30 vpr 57.82 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2 2 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 59208 2 2 22 24 2 4 6 4 4 16 clb auto 19.5 MiB 0.01 4 15 2 10 3 57.8 MiB 0.00 0.00 1.297 0 0 1.297 0.01 4.3769e-05 3.8064e-05 0.000324999 0.000293005 4 6 2 72000 36000 2827.54 176.721 0.01 0.00228834 0.0020767 644 852 -1 6 2 4 4 138 80 1.297 1.297 0 0 0 0 4025.56 251.598 0.00 0.00 0.00 -1 -1 0.00 0.00162095 0.00152882 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_modeling/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_modeling/config/golden_results.txt index 0a89c3b897f..e7a944100ab 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_modeling/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_modeling/config/golden_results.txt @@ -1,9 +1,9 @@ - arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time num_global_nets num_routed_nets - timing/k6_N10_40nm.xml microbenchmarks/d_flip_flop.v common_--clock_modeling_ideal_--route_chan_width_60 0.28 vpr 56.10 MiB -1 -1 0.05 20076 1 0.00 -1 -1 32876 -1 -1 1 2 -1 -1 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 57444 2 1 3 4 1 3 4 3 3 9 -1 auto 17.5 MiB 0.00 4 56.1 MiB 0.00 0.00 0.571526 -0.946421 -0.571526 0.571526 0.00 7.472e-06 4.444e-06 5.2255e-05 3.5357e-05 -1 2 2 53894 53894 12370.0 1374.45 0.00 0.000150639 0.000101445 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 - timing/k6_N10_40nm.xml microbenchmarks/d_flip_flop.v common_--clock_modeling_route_--route_chan_width_60 0.26 vpr 56.07 MiB -1 -1 0.05 20348 1 0.00 -1 -1 33116 -1 -1 1 2 -1 -1 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 57412 2 1 3 4 1 3 4 3 3 9 -1 auto 17.5 MiB 0.00 6 56.1 MiB 0.00 0.00 0.526189 -0.94819 -0.526189 0.526189 0.00 7.373e-06 4.228e-06 5.4128e-05 3.6541e-05 -1 8 3 53894 53894 14028.3 1558.70 0.00 0.000159415 0.000108151 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 3 - timing/k6_N10_40nm.xml verilog/mkPktMerge.v common_--clock_modeling_ideal_--route_chan_width_60 20.62 yosys 204.34 MiB -1 -1 16.70 209240 2 1.15 -1 -1 59832 -1 -1 155 5 -1 -1 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 59280 5 156 191 347 1 163 316 15 15 225 clb auto 19.6 MiB 0.02 22 57.9 MiB 0.07 0.00 1.10153 -11.3996 -1.10153 1.10153 0.01 0.000146864 0.000131212 0.011265 0.0101019 -1 38 5 9.10809e+06 8.35357e+06 828754. 3683.35 0.00 0.0144781 0.0131086 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 154 9 - timing/k6_N10_40nm.xml verilog/mkPktMerge.v common_--clock_modeling_route_--route_chan_width_60 20.82 yosys 204.26 MiB -1 -1 16.55 209160 2 1.15 -1 -1 59996 -1 -1 155 5 -1 -1 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 59440 5 156 191 347 1 163 316 15 15 225 clb auto 19.8 MiB 0.02 25 58.0 MiB 0.08 0.00 1.12309 -11.8205 -1.12309 1.12309 0.01 0.000159954 0.000141912 0.0122901 0.011012 -1 48 4 9.10809e+06 8.35357e+06 858153. 3814.01 0.00 0.0153796 0.0139115 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 153 10 - timing/k6_N10_mem32K_40nm.xml microbenchmarks/d_flip_flop.v common_--clock_modeling_ideal_--route_chan_width_60 0.34 vpr 60.93 MiB -1 -1 0.06 20324 1 0.00 -1 -1 33212 -1 -1 1 2 0 0 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 62388 2 1 3 4 1 3 4 3 3 9 -1 auto 22.2 MiB 0.00 4 60.9 MiB 0.01 0.00 0.571526 -0.946421 -0.571526 0.571526 0.00 2.0228e-05 1.4173e-05 7.1706e-05 4.9857e-05 -1 2 2 53894 53894 12370.0 1374.45 0.00 0.000190797 0.00013594 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 - timing/k6_N10_mem32K_40nm.xml microbenchmarks/d_flip_flop.v common_--clock_modeling_route_--route_chan_width_60 0.27 vpr 61.09 MiB -1 -1 0.06 20372 1 0.01 -1 -1 33044 -1 -1 1 2 0 0 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 62556 2 1 3 4 1 3 4 3 3 9 -1 auto 22.4 MiB 0.00 6 61.1 MiB 0.00 0.00 0.526189 -0.94819 -0.526189 0.526189 0.00 7.855e-06 4.642e-06 5.7063e-05 3.8267e-05 -1 8 3 53894 53894 14028.3 1558.70 0.00 0.000173245 0.000119981 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 3 - timing/k6_N10_mem32K_40nm.xml verilog/mkPktMerge.v common_--clock_modeling_ideal_--route_chan_width_60 5.31 vpr 69.03 MiB -1 -1 0.84 29148 2 0.11 -1 -1 37560 -1 -1 32 311 15 0 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 70688 311 156 972 1128 1 953 514 28 28 784 memory auto 30.9 MiB 0.40 8912 69.0 MiB 0.82 0.02 4.4435 -4133.58 -4.4435 4.4435 0.14 0.00289443 0.00248922 0.296854 0.253031 -1 13294 11 4.25198e+07 9.94461e+06 2.96205e+06 3778.13 1.02 0.4114 0.359724 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 15 938 - timing/k6_N10_mem32K_40nm.xml verilog/mkPktMerge.v common_--clock_modeling_route_--route_chan_width_60 5.65 vpr 69.32 MiB -1 -1 0.83 29208 2 0.10 -1 -1 37556 -1 -1 32 311 15 0 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 70980 311 156 972 1128 1 953 514 28 28 784 memory auto 31.2 MiB 0.39 9335 69.3 MiB 0.87 0.03 4.01406 -3152.9 -4.01406 4.01406 0.18 0.00283395 0.00242386 0.302074 0.260062 -1 14070 16 4.25198e+07 9.94461e+06 3.02951e+06 3864.17 1.19 0.45079 0.397878 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 14 939 +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time num_global_nets num_routed_nets +timing/k6_N10_40nm.xml microbenchmarks/d_flip_flop.v common_--clock_modeling_ideal_--route_chan_width_60 0.30 vpr 57.61 MiB -1 -1 0.06 19388 1 0.02 -1 -1 33516 -1 -1 1 2 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 58988 2 1 3 4 1 3 4 3 3 9 -1 auto 19.1 MiB 0.00 4 9 6 3 0 57.6 MiB 0.00 0.00 0.55447 -0.91031 -0.55447 0.55447 0.00 1.4209e-05 1.0635e-05 0.000112608 8.885e-05 -1 2 1 18000 18000 14049.7 1561.07 0.00 0.00111531 0.00103596 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 +timing/k6_N10_40nm.xml microbenchmarks/d_flip_flop.v common_--clock_modeling_route_--route_chan_width_60 0.30 vpr 57.69 MiB -1 -1 0.06 19244 1 0.02 -1 -1 33536 -1 -1 1 2 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 59076 2 1 3 4 1 3 4 3 3 9 -1 auto 19.2 MiB 0.00 6 9 5 2 2 57.7 MiB 0.00 0.00 0.48631 -0.91031 -0.48631 0.48631 0.00 1.4475e-05 1.0195e-05 0.000102982 7.9111e-05 -1 4 1 18000 18000 15707.9 1745.32 0.00 0.00110914 0.00104203 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 3 +timing/k6_N10_40nm.xml verilog/mkPktMerge.v common_--clock_modeling_ideal_--route_chan_width_60 26.57 parmys 203.92 MiB -1 -1 21.33 208816 2 1.49 -1 -1 61188 -1 -1 155 5 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 61088 5 156 191 347 1 163 316 15 15 225 clb auto 21.3 MiB 0.03 22 75566 54444 2848 18274 59.7 MiB 0.07 0.00 1.49664 -15.129 -1.49664 1.49664 0.00 0.000225009 0.000209684 0.0166386 0.0154931 -1 38 6 3.042e+06 2.79e+06 863192. 3836.41 0.01 0.0221087 0.0205962 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 154 9 +timing/k6_N10_40nm.xml verilog/mkPktMerge.v common_--clock_modeling_route_--route_chan_width_60 26.99 parmys 204.15 MiB -1 -1 21.52 209052 2 1.49 -1 -1 60656 -1 -1 155 5 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 60972 5 156 191 347 1 163 316 15 15 225 clb auto 21.3 MiB 0.03 25 77716 55619 3345 18752 59.5 MiB 0.13 0.00 1.47823 -14.9031 -1.47823 1.47823 0.00 0.000388878 0.000358886 0.0289108 0.0266306 -1 38 3 3.042e+06 2.79e+06 892591. 3967.07 0.01 0.0351201 0.0324031 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 153 10 +timing/k6_N10_mem32K_40nm.xml microbenchmarks/d_flip_flop.v common_--clock_modeling_ideal_--route_chan_width_60 0.35 vpr 63.08 MiB -1 -1 0.08 19324 1 0.02 -1 -1 33472 -1 -1 1 2 0 0 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 64592 2 1 3 4 1 3 4 3 3 9 -1 auto 24.5 MiB 0.00 4 9 6 2 1 63.1 MiB 0.00 0.00 0.55247 -0.90831 -0.55247 0.55247 0.00 1.3129e-05 9.703e-06 0.000103951 8.1123e-05 -1 2 2 53894 53894 12370.0 1374.45 0.00 0.00116445 0.00109439 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 +timing/k6_N10_mem32K_40nm.xml microbenchmarks/d_flip_flop.v common_--clock_modeling_route_--route_chan_width_60 0.35 vpr 62.96 MiB -1 -1 0.08 19876 1 0.02 -1 -1 33484 -1 -1 1 2 0 0 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 64468 2 1 3 4 1 3 4 3 3 9 -1 auto 24.3 MiB 0.00 6 9 5 2 2 63.0 MiB 0.00 0.00 0.48631 -0.90831 -0.48631 0.48631 0.00 1.5477e-05 1.1104e-05 0.000110622 8.6576e-05 -1 8 1 53894 53894 14028.3 1558.70 0.00 0.00113491 0.00106717 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 3 +timing/k6_N10_mem32K_40nm.xml verilog/mkPktMerge.v common_--clock_modeling_ideal_--route_chan_width_60 6.10 vpr 71.24 MiB -1 -1 1.09 28164 2 0.15 -1 -1 37372 -1 -1 32 311 15 0 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 72952 311 156 972 1128 1 953 514 28 28 784 memory auto 33.0 MiB 0.48 8979 193966 70726 114124 9116 71.2 MiB 1.31 0.03 4.11528 -4394.91 -4.11528 4.11528 0.00 0.00488787 0.00418834 0.465058 0.395185 -1 13380 12 4.25198e+07 9.94461e+06 2.96205e+06 3778.13 0.38 0.643724 0.557601 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 15 938 +timing/k6_N10_mem32K_40nm.xml verilog/mkPktMerge.v common_--clock_modeling_route_--route_chan_width_60 6.26 vpr 71.41 MiB -1 -1 1.06 28216 2 0.15 -1 -1 37564 -1 -1 32 311 15 0 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 73128 311 156 972 1128 1 953 514 28 28 784 memory auto 33.2 MiB 0.48 8125 208372 75006 121666 11700 71.4 MiB 1.39 0.02 4.69946 -3846.5 -4.69946 4.69946 0.00 0.00473553 0.0040387 0.491963 0.415743 -1 12865 15 4.25198e+07 9.94461e+06 3.02951e+06 3864.17 0.41 0.692219 0.598424 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 14 939 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_diff_mux_for_inc_dec_wires/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_diff_mux_for_inc_dec_wires/config/golden_results.txt index aeff2922e33..65afefc1140 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_diff_mux_for_inc_dec_wires/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_diff_mux_for_inc_dec_wires/config/golden_results.txt @@ -1,3 +1,3 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -k6_N10_40nm.xml stereovision0.v common 140.15 vpr 272.04 MiB -1 -1 12.85 119508 5 37.98 -1 -1 65828 -1 -1 1307 169 -1 -1 success v8.0.0-10642-gf11aaea3f release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-07-18T10:51:58 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/inc_dec_wires/vtr-verilog-to-routing/vtr_flow/tasks 278564 169 197 21166 21363 1 7566 1673 39 39 1521 clb auto 140.8 MiB 3.95 53142 974519 347458 607168 19893 272.0 MiB 10.23 0.10 3.63366 -15348.4 -3.63366 3.63366 9.71 0.0315035 0.026913 3.45179 2.89228 44 69063 46 7.37824e+07 7.04408e+07 4.68145e+06 3077.88 43.27 19.1637 16.0718 125110 968779 -1 63965 24 33902 65662 2813140 489196 3.57565 3.57565 -16119.5 -3.57565 0 0 6.05227e+06 3979.14 1.97 2.96 0.84 -1 -1 1.97 2.09287 1.80452 -k6_N10_40nm_diff_switch_for_inc_dec_wires.xml stereovision0.v common 138.68 vpr 271.91 MiB -1 -1 12.57 119440 5 36.46 -1 -1 65388 -1 -1 1307 169 -1 -1 success v8.0.0-10642-gf11aaea3f release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-07-18T10:51:58 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/inc_dec_wires/vtr-verilog-to-routing/vtr_flow/tasks 278432 169 197 21166 21363 1 7566 1673 39 39 1521 clb auto 140.7 MiB 3.96 53142 974519 347458 607168 19893 271.9 MiB 11.12 0.11 3.63366 -15348.4 -3.63366 3.63366 9.48 0.037164 0.0323029 4.04546 3.42407 44 69063 46 7.37824e+07 7.04408e+07 4.68145e+06 3077.88 43.94 19.9593 16.7643 125110 968779 -1 63965 24 33902 65662 2813140 489196 3.57565 3.57565 -16119.5 -3.57565 0 0 6.05227e+06 3979.14 2.02 2.80 0.83 -1 -1 2.02 1.87812 1.62699 +k6_N10_40nm.xml stereovision0.v common 122.65 vpr 276.58 MiB -1 -1 16.00 124916 5 53.73 -1 -1 69176 -1 -1 1305 169 -1 -1 success 28100b1 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-26T22:45:19 gh-actions-runner-vtr-auto-spawned39 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 283220 169 197 21117 21314 1 7688 1671 39 39 1521 clb auto 143.1 MiB 2.85 53732 982959 351248 609845 21866 276.6 MiB 9.99 0.10 3.76204 -15507.8 -3.76204 3.76204 9.44 0.0159737 0.0133362 1.80765 1.48264 42 70545 48 2.4642e+07 2.349e+07 4.65856e+06 3062.82 15.47 6.76385 5.69 122070 947469 -1 65863 22 34742 65762 3361891 541264 4.0937 4.0937 -16235.4 -4.0937 0 0 5.79504e+06 3810.02 1.92 1.71 0.57 -1 -1 1.92 1.12349 0.992148 +k6_N10_40nm_diff_switch_for_inc_dec_wires.xml stereovision0.v common 134.93 vpr 274.07 MiB -1 -1 16.21 125172 5 55.27 -1 -1 68860 -1 -1 1305 169 -1 -1 success 28100b1 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-26T22:45:19 gh-actions-runner-vtr-auto-spawned39 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 280648 169 197 21117 21314 1 7523 1671 39 39 1521 clb auto 143.0 MiB 2.85 51811 1022607 376856 616274 29477 274.1 MiB 10.32 0.10 3.69006 -14773.1 -3.69006 3.69006 9.39 0.0156983 0.0130538 1.86259 1.52838 38 69466 48 7.37824e+07 7.0333e+07 4.16760e+06 2740.04 25.94 8.42194 7.05848 119030 845795 -1 62788 23 35878 68950 2803037 507578 3.5696 3.5696 -16170.3 -3.5696 0 0 5.22668e+06 3436.35 1.75 1.68 0.49 -1 -1 1.75 1.12315 0.986398 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_verify_rr_graph_3d/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_verify_rr_graph_3d/config/config.txt new file mode 100644 index 00000000000..3897e7c8b98 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_verify_rr_graph_3d/config/config.txt @@ -0,0 +1,28 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/titan_blif/other_benchmarks/stratixiv + +# Path to directory of architectures to use +archs_dir=arch/multi_die/simple_arch + +# Add circuits to list to sweep +circuit_list_add=ucsb_152_tap_fir_stratixiv_arch_timing.blif + +# Add architectures to list to sweep +arch_list_add=k6_frac_N10_40nm.xml + +# Parse info and how to parse +parse_file=vpr_fixed_chan_width.txt +parse_file=vpr_parse_second_file.txt + +# How to parse QoR info +qor_parse_file=qor_rr_graph.txt + +# Pass requirements +pass_requirements_file=pass_requirements_verify_rr_graph.txt + +# Script parameters +script_params = -verify_rr_graph --route_chan_width 100 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_verify_rr_graph_3d/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_verify_rr_graph_3d/config/golden_results.txt new file mode 100644 index 00000000000..3d6020c77de --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_verify_rr_graph_3d/config/golden_results.txt @@ -0,0 +1,3 @@ + arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time routed_wirelength total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem router_lookahead_computation_time + k4_N4_90nm.xml stereovision3.v common 1.67 vpr 56.43 MiB -1 -1 0.45 25384 6 0.13 -1 -1 35724 -1 -1 26 10 -1 -1 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 57788 10 2 186 188 1 49 38 8 8 64 clb auto 18.2 MiB 0.02 224 56.4 MiB 0.01 0.00 2.40278 -103.067 -2.40278 2.40278 0.00 0.000119899 9.7708e-05 0.00253947 0.0021251 203 186 417 22870 4288 80255.5 57962.3 276194. 4315.53 12 2.46522 2.46522 -111.211 -2.46522 -0.0734 -0.0734 56.4 MiB 0.01 0.00769657 0.00673067 56.4 MiB 0.03 + k6_frac_N10_40nm.xml stereovision3.v common 1.49 vpr 57.75 MiB -1 -1 0.45 25728 5 0.13 -1 -1 35988 -1 -1 7 10 -1 -1 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 59136 10 2 181 183 1 37 19 5 5 25 clb auto 19.5 MiB 0.05 109 57.8 MiB 0.01 0.00 1.93928 -79.4364 -1.93928 1.93928 0.00 0.000117405 9.5811e-05 0.00377247 0.00328288 95 62 85 1742 528 485046 377258 99699.4 3987.98 5 2.07705 2.07705 -87.1807 -2.07705 0 0 57.8 MiB 0.01 0.00934793 0.00856098 57.8 MiB 0.01 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_aliases/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_aliases/config/golden_results.txt index 06d1734c750..39d7267adf0 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_aliases/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_aliases/config/golden_results.txt @@ -1,4 +1,4 @@ - arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time - timing/k6_N10_40nm.xml clock_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/clk.sdc 0.54 vpr 54.49 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 1 -1 -1 success v8.0.0-6793-gb52911b9f release IPO VTR_ASSERT_LEVEL=2 GNU 7.5.0 on Linux-4.15.0-167-generic x86_64 2022-11-27T15:52:14 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/pack_refactor/vtr-verilog-to-routing 55800 1 4 28 32 2 10 9 4 4 16 clb auto 15.8 MiB 0.03 20 54.5 MiB 0.00 0.00 2.18276 0 0 2.18276 0.02 2.9141e-05 2.1776e-05 0.000384601 0.00034525 8 18 4 215576 215576 5503.53 343.971 0.03 0.00411474 0.00340189 12 4 18 18 422 145 2.20417 2.20417 0 0 0 0 6317.10 394.819 0.00 0.00 0.000888863 0.000804989 - timing/k6_N10_40nm.xml clock_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/clk_assign.sdc 0.66 vpr 54.73 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 1 -1 -1 success v8.0.0-6793-gb52911b9f release IPO VTR_ASSERT_LEVEL=2 GNU 7.5.0 on Linux-4.15.0-167-generic x86_64 2022-11-27T15:52:14 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/pack_refactor/vtr-verilog-to-routing 56048 1 4 28 32 2 10 9 4 4 16 clb auto 15.9 MiB 0.02 20 54.7 MiB 0.03 0.00 2.18276 0 0 2.18276 0.02 5.182e-05 4.0804e-05 0.000410032 0.000361391 8 18 4 215576 215576 5503.53 343.971 0.06 0.00413673 0.00341089 12 4 18 18 422 145 2.20417 2.20417 0 0 0 0 6317.10 394.819 0.00 0.00 0.000908048 0.000820443 - timing/k6_N10_40nm.xml clock_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/counter_clk.sdc 0.57 vpr 54.77 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 1 -1 -1 success v8.0.0-6793-gb52911b9f release IPO VTR_ASSERT_LEVEL=2 GNU 7.5.0 on Linux-4.15.0-167-generic x86_64 2022-11-27T15:52:14 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/pack_refactor/vtr-verilog-to-routing 56080 1 4 28 32 2 10 9 4 4 16 clb auto 15.9 MiB 0.01 20 54.8 MiB 0.00 0.00 2.18276 0 0 2.18276 0.01 2.5851e-05 1.9164e-05 0.000325142 0.000288851 8 18 4 215576 215576 5503.53 343.971 0.03 0.0040566 0.00334035 12 4 18 18 422 145 2.20417 2.20417 0 0 0 0 6317.10 394.819 0.01 0.00 0.00112307 0.0010329 +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +timing/k6_N10_40nm.xml clock_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/clk.sdc 5.41 vpr 210.32 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 1 -1 -1 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 64611 1 4 28 32 2 10 9 4 4 16 clb auto 51.6 MiB 0.14 20 27 15 8 4 193.4 MiB 0.03 0.00 2.44626 0 0 2.44626 0.51 0.000583141 0.000535113 0.00311475 0.00261673 8 12 5 72000 72000 5593.62 349.601 2.10 0.0862302 0.0767113 672 1128 -1 12 6 24 24 485 152 2.38921 2.38921 0 0 0 0 6492.02 405.751 0.01 0.04 0.17 -1 -1 0.01 0.0109455 0.00960207 +timing/k6_N10_40nm.xml clock_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/clk_assign.sdc 7.10 vpr 394.45 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 1 -1 -1 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 80782 1 4 28 32 2 10 9 4 4 16 clb auto 51.6 MiB 0.14 20 27 15 8 4 193.6 MiB 0.03 0.00 2.44626 0 0 2.44626 0.51 0.000585834 0.000538181 0.00313704 0.0026163 8 12 5 72000 72000 5593.62 349.601 2.10 0.0854666 0.0759832 672 1128 -1 12 6 24 24 485 152 2.38921 2.38921 0 0 0 0 6492.02 405.751 0.01 0.04 0.17 -1 -1 0.01 0.0110022 0.00964644 +timing/k6_N10_40nm.xml clock_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/counter_clk.sdc 5.33 vpr 210.00 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 1 -1 -1 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 64510 1 4 28 32 2 10 9 4 4 16 clb auto 51.7 MiB 0.14 20 27 15 8 4 193.7 MiB 0.03 0.00 2.44626 0 0 2.44626 0.50 0.000588315 0.000539577 0.00308844 0.0026033 8 12 5 72000 72000 5593.62 349.601 2.09 0.0840539 0.0747826 672 1128 -1 12 6 24 24 485 152 2.38921 2.38921 0 0 0 0 6492.02 405.751 0.01 0.04 0.17 -1 -1 0.01 0.0100088 0.0087372 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_aliases_set_delay/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_aliases_set_delay/config/golden_results.txt index 716bd84b5d9..67438b42e30 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_aliases_set_delay/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_aliases_set_delay/config/golden_results.txt @@ -1,2 +1,2 @@ - arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time - timing/k6_N10_40nm.xml clock_set_delay_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/set_delay.sdc 0.70 vpr 54.43 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2 2 -1 -1 success v8.0.0-6793-gb52911b9f release IPO VTR_ASSERT_LEVEL=2 GNU 7.5.0 on Linux-4.15.0-167-generic x86_64 2022-11-27T15:52:14 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/pack_refactor/vtr-verilog-to-routing 55736 2 2 22 24 2 4 6 4 4 16 clb auto 16.0 MiB 0.00 4 54.4 MiB 0.00 0.00 1.293 0 0 1.293 0.02 3.0248e-05 2.3255e-05 0.00029412 0.000252171 6 6 1 215576 107788 3924.73 245.296 0.09 0.0011793 0.00104221 9 3 5 5 233 128 1.293 1.293 0 0 0 0 5503.53 343.971 0.00 0.06 0.000728173 0.000642893 +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +timing/k6_N10_40nm.xml clock_set_delay_aliases.blif common_-sdc_file_sdc/samples/clock_aliases/set_delay.sdc 4.81 vpr 207.77 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2 2 -1 -1 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 63825 2 2 22 24 2 4 6 4 4 16 clb auto 51.5 MiB 0.10 4 15 2 10 3 192.8 MiB 0.02 0.00 1.297 0 0 1.297 0.49 0.000472548 0.000433891 0.00258794 0.00211333 4 6 2 72000 36000 2827.54 176.721 1.75 0.0226752 0.0196524 644 852 -1 6 2 4 4 138 80 1.297 1.297 0 0 0 0 4025.56 251.598 0.01 0.02 0.16 -1 -1 0.01 0.00604643 0.0051332 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_modeling/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_modeling/config/golden_results.txt index b8c6939d726..ffbc6a68d97 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_modeling/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_modeling/config/golden_results.txt @@ -1,9 +1,9 @@ -arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time num_global_nets num_routed_nets -timing/k6_N10_40nm.xml microbenchmarks/d_flip_flop.v common_-start_odin_--clock_modeling_ideal_--route_chan_width_60 0.24 vpr 55.66 MiB 0.00 5216 -1 -1 1 0.00 -1 -1 32328 -1 -1 1 2 -1 -1 success v8.0.0-7649-g3eb9a4e-dirty Release IPO VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-20T17:52:50 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 56996 2 1 3 4 1 3 4 3 3 9 -1 auto 16.5 MiB 0.00 4 55.7 MiB 0.00 0.00 0.571526 -0.946421 -0.571526 0.571526 0.00 7.621e-06 5.224e-06 6.7874e-05 5.0802e-05 -1 2 2 53894 53894 12370.0 1374.45 0.00 0.000205736 0.000155947 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 -timing/k6_N10_40nm.xml microbenchmarks/d_flip_flop.v common_-start_odin_--clock_modeling_route_--route_chan_width_60 0.23 vpr 55.70 MiB 0.00 5212 -1 -1 1 0.01 -1 -1 32344 -1 -1 1 2 -1 -1 success v8.0.0-7649-g3eb9a4e-dirty Release IPO VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-20T17:52:50 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 57032 2 1 3 4 1 3 4 3 3 9 -1 auto 16.6 MiB 0.00 6 55.7 MiB 0.00 0.00 0.526189 -0.94819 -0.526189 0.526189 0.00 7.761e-06 5.265e-06 6.7961e-05 5.0472e-05 -1 4 1 53894 53894 14028.3 1558.70 0.00 0.000187922 0.00014281 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 3 -timing/k6_N10_40nm.xml verilog/mkPktMerge.v common_-start_odin_--clock_modeling_ideal_--route_chan_width_60 4.64 vpr 58.30 MiB 0.36 58984 -1 -1 2 1.50 -1 -1 48756 -1 -1 155 5 -1 -1 success v8.0.0-7649-g3eb9a4e-dirty Release IPO VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-20T17:52:50 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 59700 5 156 191 347 1 163 316 15 15 225 clb auto 19.7 MiB 0.05 22 58.3 MiB 0.20 0.00 1.10064 -11.4028 -1.10064 1.10064 0.02 0.000207655 0.000184614 0.0172662 0.0153484 -1 34 4 9.10809e+06 8.35357e+06 828754. 3683.35 0.01 0.0215404 0.0193498 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 154 9 -timing/k6_N10_40nm.xml verilog/mkPktMerge.v common_-start_odin_--clock_modeling_route_--route_chan_width_60 4.63 vpr 57.95 MiB 0.36 59028 -1 -1 2 1.45 -1 -1 48796 -1 -1 155 5 -1 -1 success v8.0.0-7649-g3eb9a4e-dirty Release IPO VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-20T17:52:50 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 59344 5 156 191 347 1 163 316 15 15 225 clb auto 19.5 MiB 0.05 25 58.0 MiB 0.20 0.00 1.08173 -11.7171 -1.08173 1.08173 0.02 0.000210434 0.000187171 0.0177434 0.0157333 -1 56 4 9.10809e+06 8.35357e+06 858153. 3814.01 0.01 0.0220048 0.0197235 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 153 10 -timing/k6_N10_mem32K_40nm.xml microbenchmarks/d_flip_flop.v common_-start_odin_--clock_modeling_ideal_--route_chan_width_60 0.30 vpr 60.97 MiB 0.03 5868 -1 -1 1 0.00 -1 -1 32324 -1 -1 1 2 0 0 success v8.0.0-7649-g3eb9a4e-dirty Release IPO VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-20T17:52:50 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 62436 2 1 3 4 1 3 4 3 3 9 -1 auto 22.2 MiB 0.00 4 61.0 MiB 0.00 0.00 0.571526 -0.946421 -0.571526 0.571526 0.00 7.366e-06 4.962e-06 6.4942e-05 4.834e-05 -1 2 2 53894 53894 12370.0 1374.45 0.00 0.000199931 0.000152012 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 -timing/k6_N10_mem32K_40nm.xml microbenchmarks/d_flip_flop.v common_-start_odin_--clock_modeling_route_--route_chan_width_60 0.29 vpr 61.36 MiB 0.02 5840 -1 -1 1 0.01 -1 -1 32312 -1 -1 1 2 0 0 success v8.0.0-7649-g3eb9a4e-dirty Release IPO VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-20T17:52:50 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 62828 2 1 3 4 1 3 4 3 3 9 -1 auto 22.4 MiB 0.00 6 61.4 MiB 0.00 0.00 0.526189 -0.94819 -0.526189 0.526189 0.00 7.761e-06 5.205e-06 6.9109e-05 5.2607e-05 -1 4 1 53894 53894 14028.3 1558.70 0.00 0.000190744 0.000146909 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 3 -timing/k6_N10_mem32K_40nm.xml verilog/mkPktMerge.v common_-start_odin_--clock_modeling_ideal_--route_chan_width_60 6.17 vpr 69.56 MiB 0.21 16208 -1 -1 2 0.15 -1 -1 37780 -1 -1 32 311 15 0 success v8.0.0-7649-g3eb9a4e-dirty Release IPO VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-20T17:52:50 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 71228 311 156 972 1128 1 953 514 28 28 784 memory auto 31.1 MiB 0.57 8510 69.6 MiB 1.42 0.02 3.82722 -4064.49 -3.82722 3.82722 0.26 0.00342284 0.00289929 0.387553 0.329277 -1 13023 14 4.25198e+07 9.94461e+06 2.96205e+06 3778.13 1.10 0.5346 0.464391 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 15 938 -timing/k6_N10_mem32K_40nm.xml verilog/mkPktMerge.v common_-start_odin_--clock_modeling_route_--route_chan_width_60 6.41 vpr 69.44 MiB 0.18 16292 -1 -1 2 0.13 -1 -1 37668 -1 -1 32 311 15 0 success v8.0.0-7649-g3eb9a4e-dirty Release IPO VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-20T17:52:50 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 71104 311 156 972 1128 1 953 514 28 28 784 memory auto 30.9 MiB 0.58 8543 69.4 MiB 1.40 0.02 4.32962 -3179.64 -4.32962 4.32962 0.21 0.00343224 0.00290294 0.37301 0.318522 -1 13270 16 4.25198e+07 9.94461e+06 3.02951e+06 3864.17 1.34 0.531097 0.463938 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 14 939 +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time num_global_nets num_routed_nets +timing/k6_N10_40nm.xml microbenchmarks/d_flip_flop.v common_-start_odin_--clock_modeling_ideal_--route_chan_width_60 2.93 vpr 373.06 MiB 0.14 20488 -1 -1 1 0.02 -1 -1 33464 -1 -1 1 2 -1 -1 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 76402 2 1 3 4 1 3 4 3 3 9 -1 auto 49.4 MiB 0.02 4 9 3 5 1 184.5 MiB 0.01 0.00 0.55447 -0.91031 -0.55447 0.55447 0.00 0.000175279 0.000165491 0.000976401 0.000767979 -1 2 4 18000 18000 14049.7 1561.07 0.01 0.00449048 0.00368003 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 +timing/k6_N10_40nm.xml microbenchmarks/d_flip_flop.v common_-start_odin_--clock_modeling_route_--route_chan_width_60 1.28 vpr 188.73 MiB 0.14 20620 -1 -1 1 0.02 -1 -1 33596 -1 -1 1 2 -1 -1 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 77305 2 1 3 4 1 3 4 3 3 9 -1 auto 49.8 MiB 0.02 6 9 3 3 3 184.7 MiB 0.01 0.00 0.48631 -0.91031 -0.48631 0.48631 0.00 0.000179683 0.000169331 0.00104574 0.000777636 -1 4 3 18000 18000 15707.9 1745.32 0.01 0.00434022 0.00346833 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 3 +timing/k6_N10_40nm.xml verilog/mkPktMerge.v common_-start_odin_--clock_modeling_ideal_--route_chan_width_60 37.69 odin 761.83 MiB 14.13 780112 -1 -1 2 1.41 -1 -1 54088 -1 -1 155 5 -1 -1 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67103 5 156 191 347 1 163 316 15 15 225 clb auto 72.2 MiB 1.21 22 86316 62090 3287 20939 314.4 MiB 0.1654 0.11 1.49664 -15.129 -1.49664 1.49664 0.00 0.00426637 0.00401077 0.352304 0.330517 -1 30 6 3.042e+06 2.79e+06 863192. 3836.41 0.18 0.426089 0.399481 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 154 9 +timing/k6_N10_40nm.xml verilog/mkPktMerge.v common_-start_odin_--clock_modeling_route_--route_chan_width_60 37.85 odin 761.64 MiB 14.27 779924 -1 -1 2 1.37 -1 -1 54552 -1 -1 155 5 -1 -1 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67222 5 156 191 347 1 163 316 15 15 225 clb auto 71.9 MiB 1.14 25 86316 61881 3554 20881 315.0 MiB 0.167 0.10 1.47767 -14.8876 -1.47767 1.47767 0.00 0.00427632 0.00401248 0.356421 0.332972 -1 53 7 3.042e+06 2.79e+06 892591. 3967.07 0.20 0.438475 0.409721 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 153 10 +timing/k6_N10_mem32K_40nm.xml microbenchmarks/d_flip_flop.v common_-start_odin_--clock_modeling_ideal_--route_chan_width_60 6.72 vpr 218.05 MiB 2.19 38292 -1 -1 1 0.02 -1 -1 33576 -1 -1 1 2 0 0 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66984 2 1 3 4 1 3 4 3 3 9 -1 auto 76.3 MiB 0.03 4 9 3 5 1 213.1 MiB 0.01 0.00 0.55247 -0.90831 -0.55247 0.55247 0.00 0.000167702 0.000158607 0.000975812 0.00076565 -1 2 3 53894 53894 12370.0 1374.45 0.01 0.00428013 0.00347708 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 +timing/k6_N10_mem32K_40nm.xml microbenchmarks/d_flip_flop.v common_-start_odin_--clock_modeling_route_--route_chan_width_60 6.85 vpr 218.27 MiB 2.22 38424 -1 -1 1 0.02 -1 -1 33516 -1 -1 1 2 0 0 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67051 2 1 3 4 1 3 4 3 3 9 -1 auto 76.6 MiB 0.03 6 9 3 3 3 213.5 MiB 0.01 0.00 0.48631 -0.90831 -0.48631 0.48631 0.00 0.000179672 0.000169578 0.00106215 0.000790093 -1 4 2 53894 53894 14028.3 1558.70 0.01 0.00420473 0.00335158 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 3 +timing/k6_N10_mem32K_40nm.xml verilog/mkPktMerge.v common_-start_odin_--clock_modeling_ideal_--route_chan_width_60 109.92 odin 592.27 MiB 9.47 606488 -1 -1 2 0.15 -1 -1 37288 -1 -1 32 311 15 0 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 110016 311 156 972 1128 1 953 514 28 28 784 memory auto 194.9 MiB 0.5255 8852 208372 78139 120196 10037 513.9 MiB 2.052 0.47 4.11307 -4320.89 -4.11307 4.11307 0.02 0.112519 0.10601 11.801 11.1176 -1 13295 15 4.25198e+07 9.94461e+06 2.96205e+06 3778.13 0.496 15.7544 14.9174 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 15 938 +timing/k6_N10_mem32K_40nm.xml verilog/mkPktMerge.v common_-start_odin_--clock_modeling_route_--route_chan_width_60 111.57 odin 592.08 MiB 9.49 606288 -1 -1 2 0.16 -1 -1 37464 -1 -1 32 311 15 0 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 110642 311 156 972 1128 1 953 514 28 28 784 memory auto 195.1 MiB 0.527 8741 214546 82147 122429 9970 513.6 MiB 2.10 0.47 4.83167 -3665.82 -4.83167 4.83167 0.02 0.112604 0.106159 12.0753 11.3955 -1 13585 17 4.25198e+07 9.94461e+06 3.02951e+06 3864.17 0.536 16.4333 15.5853 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 14 939 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_diff_mux_for_inc_dec_wires/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_diff_mux_for_inc_dec_wires/config/golden_results.txt index 873a4df06d4..8e8f487bfc9 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_diff_mux_for_inc_dec_wires/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_diff_mux_for_inc_dec_wires/config/golden_results.txt @@ -1,3 +1,3 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -k6_N10_40nm.xml stereovision0.v common 170.85 vpr 277.04 MiB 2.40 125884 -1 -1 5 83.06 -1 -1 75508 -1 -1 1297 157 -1 -1 success v8.0.0-10644-gbada3f40f release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-07-18T12:52:25 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/inc_dec_wires/vtr-verilog-to-routing/vtr_flow/tasks 283692 157 197 21024 21221 1 7547 1651 39 39 1521 clb auto 144.7 MiB 3.96 51912 967297 355681 587577 24039 277.0 MiB 10.10 0.10 3.27987 -14557.4 -3.27987 3.27987 9.63 0.0317477 0.0266474 3.48443 2.90114 46 64729 31 7.37824e+07 6.99019e+07 4.88195e+06 3209.70 35.95 15.7549 13.2195 126630 998267 -1 62442 28 35421 67860 2863040 490307 3.17524 3.17524 -15310.6 -3.17524 0 0 6.27360e+06 4124.65 1.97 3.27 0.83 -1 -1 1.97 2.34241 2.01697 -k6_N10_40nm_diff_switch_for_inc_dec_wires.xml stereovision0.v common 170.38 vpr 277.10 MiB 2.43 126080 -1 -1 5 82.80 -1 -1 75404 -1 -1 1297 157 -1 -1 success v8.0.0-10644-gbada3f40f release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-07-18T12:52:25 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/inc_dec_wires/vtr-verilog-to-routing/vtr_flow/tasks 283748 157 197 21024 21221 1 7547 1651 39 39 1521 clb auto 144.6 MiB 4.05 51912 967297 355681 587577 24039 277.1 MiB 10.27 0.10 3.27987 -14557.4 -3.27987 3.27987 9.61 0.0323462 0.0271905 3.52052 2.92391 46 64729 31 7.37824e+07 6.99019e+07 4.88195e+06 3209.70 34.80 15.1519 12.6486 126630 998267 -1 62442 28 35421 67860 2863040 490307 3.17524 3.17524 -15310.6 -3.17524 0 0 6.27360e+06 4124.65 1.87 3.85 0.81 -1 -1 1.87 2.71824 2.34292 +k6_N10_40nm.xml stereovision0.v common 1549.26 odin 1.76 GiB 98.39 1840428 -1 -1 5 90.37 -1 -1 79020 -1 -1 1290 157 -1 -1 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 346357.2 157 197 21024 21221 1 7745 1644 38 38 1444 clb auto 783.3 MiB 4.4568 54476 961845 345093 595347 21405 1014.1 MiB 13.94 2.34 4.1021 -15015.6 -4.1021 4.1021 129.29 0.389902 0.346585 40.7176 35.9264 46 66086 26 2.3328e+07 2.322e+07 4.77644e+06 3307.78 21.28 144.943 131.243 120184 989140 -1 64416 19 33992 65277 2895642 459795 3.87727 3.87727 -15760.1 -3.87727 0 0 6.15323e+06 4261.24 6.84 1.731 15.59 -1 -1 6.84 8.80011 8.1096 +k6_N10_40nm_diff_switch_for_inc_dec_wires.xml stereovision0.v common 1603.48 odin 1.76 GiB 98.56 1840680 -1 -1 5 90.29 -1 -1 79364 -1 -1 1297 157 -1 -1 success 5941692-dirty release IPO VTR_ASSERT_LEVEL=3 sanitizers GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-27T23:00:35 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 352214 157 197 21024 21221 1 7547 1651 39 39 1521 clb auto 790.0 MiB 4.62 51912 967297 355681 587577 24039 1018.2 MiB 13.905 2.35 3.27987 -14557.4 -3.27987 3.27987 136.16 0.388182 0.340036 40.7003 35.6992 46 64729 31 7.37824e+07 6.99019e+07 4.88195e+06 3209.70 22.73 147.381 133.258 126630 998267 -1 62442 28 35421 67860 2863040 490307 3.17524 3.17524 -15310.6 -3.17524 0 0 6.27360e+06 4124.65 7.18 2.33 16.28 -1 -1 7.18 12.2122 11.2365