diff --git a/.github/scripts/hostsetup.sh b/.github/scripts/hostsetup.sh index 48f56a066a9..a136f61a43e 100755 --- a/.github/scripts/hostsetup.sh +++ b/.github/scripts/hostsetup.sh @@ -69,7 +69,8 @@ apt install -y \ default-jdk \ g++-9 \ gcc-9 \ - wget + wget \ + libtbb-dev # installing the latest version of cmake apt install -y apt-transport-https ca-certificates gnupg diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2b3eae608ec..111b1035203 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -60,6 +60,7 @@ jobs: - name: Execute test script run: stdbuf -i0 -o0 -e0 ./.github/scripts/run-vtr.sh env: + VPR_NUM_WORKERS: 4 VTR_TEST: ${{ matrix.test }} VTR_TEST_OPTIONS: ${{ matrix.options }} VTR_CMAKE_PARAMS: ${{ matrix.cmake }} diff --git a/dev/pylint_check.py b/dev/pylint_check.py index 0231480746b..6e0987a7f84 100755 --- a/dev/pylint_check.py +++ b/dev/pylint_check.py @@ -216,6 +216,9 @@ def main(): cmd = ["pylint", path, "-s", "n"] if ignore_list: cmd.append("--disable=" + ",".join(ignore_list)) + # Don't object to single-letter variable names (that's not in PEP8) + # see https://stackoverflow.com/q/21833872 + cmd.append("--variable-rgx=[a-z][a-z0-9_]{0,40}$") # Run pylint and check output process = subprocess.run(cmd, check=False, stdout=subprocess.PIPE) diff --git a/doc/src/Images/view_menu.png b/doc/src/Images/view_menu.png new file mode 100644 index 00000000000..e589d00476e Binary files /dev/null and b/doc/src/Images/view_menu.png differ diff --git a/doc/src/vpr/graphics.rst b/doc/src/vpr/graphics.rst index 65a9048a262..1ab3459fccd 100644 --- a/doc/src/vpr/graphics.rst +++ b/doc/src/vpr/graphics.rst @@ -179,6 +179,17 @@ Each block can contain a number of flip flops (ff), look up tables (lut), and ot Visualizing Block Internals +View Menu +----------------------------- +.. figure:: ../Images/view_menu.png + :align: center + + Items under view menu + +The view menu is displayed when vpr is targeting a stacked multi-die architecture (more than 1 layer). +Layers are drawn in ascending order for many drawing features (e.g. blocks); that is layer 0 is drawn first, and (if visible), layer 1 is drawn on top of it etc. +The visibility and transparency of a layer can be changed, which will affect blocks, nets, routing, and critical path. +Cross-layer connections refer to connections that are in different layers. Button Description Table ------------------------ diff --git a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h index 843aa582f12..20f24fcd4f6 100644 --- a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h +++ b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h @@ -4,9 +4,9 @@ * https://github.com/duck2/uxsdcxx * Modify only if your build process doesn't involve regenerating this file. * - * Cmdline: uxsdcxx/uxsdcxx.py /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd - * Input file: /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd - * md5sum of input file: 8672cb3951993f7e0ea3433a02507672 + * Cmdline: uxsdcxx/uxsdcxx.py /home/amin/Desktop/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + * Input file: /home/amin/Desktop/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + * md5sum of input file: 38649d034e0edccbcb511ddb8915cdff */ #include @@ -82,12 +82,12 @@ template inline void load_block_types(const pugi::xml_node &root, T &out, Context &context, const std::function *report_error, ptrdiff_t *offset_debug); template inline void load_grid_loc(const pugi::xml_node &root, T &out, Context &context, const std::function *report_error, ptrdiff_t *offset_debug); -inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int * block_type_id, int * height_offset, int * width_offset, int * x, int * y, int* layer, const std::function * report_error); +inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int * block_type_id, int * height_offset, int * width_offset, int * x, int * y, const std::function * report_error); template inline void load_grid_locs(const pugi::xml_node &root, T &out, Context &context, const std::function *report_error, ptrdiff_t *offset_debug); template inline void load_node_loc(const pugi::xml_node &root, T &out, Context &context, const std::function *report_error, ptrdiff_t *offset_debug); -inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * layer, int * ptc, int * xhigh, int * xlow, int * yhigh, int * ylow, const std::function * report_error); +inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * ptc, int * xhigh, int * xlow, int * yhigh, int * ylow, const std::function * report_error); template inline void load_node_timing(const pugi::xml_node &root, T &out, Context &context, const std::function *report_error, ptrdiff_t *offset_debug); inline void load_node_timing_required_attributes(const pugi::xml_node &root, float * C, float * R, const std::function * report_error); @@ -269,8 +269,8 @@ constexpr const char *atok_lookup_t_block_type[] = {"height", "id", "name", "wid enum class gtok_t_block_types {BLOCK_TYPE}; constexpr const char *gtok_lookup_t_block_types[] = {"block_type"}; -enum class atok_t_grid_loc {BLOCK_TYPE_ID, HEIGHT_OFFSET, WIDTH_OFFSET, X, Y, LAYER}; -constexpr const char *atok_lookup_t_grid_loc[] = {"block_type_id", "height_offset", "width_offset", "x", "y", "layer"}; +enum class atok_t_grid_loc {BLOCK_TYPE_ID, HEIGHT_OFFSET, LAYER, WIDTH_OFFSET, X, Y}; +constexpr const char *atok_lookup_t_grid_loc[] = {"block_type_id", "height_offset", "layer", "width_offset", "x", "y"}; enum class gtok_t_grid_locs {GRID_LOC}; constexpr const char *gtok_lookup_t_grid_locs[] = {"grid_loc"}; @@ -1015,21 +1015,19 @@ inline atok_t_grid_loc lex_attr_t_grid_loc(const char *in, const std::function * report_error){ +inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int * block_type_id, int * height_offset, int * width_offset, int * x, int * y, const std::function * report_error){ std::bitset<6> astate = 0; for(pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()){ atok_t_grid_loc in = lex_attr_t_grid_loc(attr.name(), report_error); if(astate[(int)in] == 0) astate[(int)in] = 1; else noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); switch(in){ - case atok_t_grid_loc::BLOCK_TYPE_ID: + case atok_t_grid_loc::BLOCK_TYPE_ID: *block_type_id = load_int(attr.value(), report_error); break; case atok_t_grid_loc::HEIGHT_OFFSET: *height_offset = load_int(attr.value(), report_error); break; + case atok_t_grid_loc::LAYER: + /* Attribute layer set after element init */ + break; case atok_t_grid_loc::WIDTH_OFFSET: *width_offset = load_int(attr.value(), report_error); break; @@ -2330,16 +2331,14 @@ inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int * case atok_t_grid_loc::Y: *y = load_int(attr.value(), report_error); break; - case atok_t_grid_loc::LAYER: - *layer=load_int(attr.value(), report_error); default: break; /* Not possible. */ } } - std::bitset<6> test_astate = astate | std::bitset<6>(0b000000); + std::bitset<6> test_astate = astate | std::bitset<6>(0b000100); if(!test_astate.all()) attr_error(test_astate, atok_lookup_t_grid_loc, report_error); } -inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * layer, int * ptc, int * xhigh, int * xlow, int * yhigh, int * ylow, const std::function * report_error){ +inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * ptc, int * xhigh, int * xlow, int * yhigh, int * ylow, const std::function * report_error){ std::bitset<7> astate = 0; for(pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()){ atok_t_node_loc in = lex_attr_t_node_loc(attr.name(), report_error); @@ -2347,7 +2346,7 @@ inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * else noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); switch(in){ case atok_t_node_loc::LAYER: - *layer = load_int(attr.value(), report_error); + /* Attribute layer set after element init */ break; case atok_t_node_loc::PTC: *ptc = load_int(attr.value(), report_error); @@ -2370,7 +2369,7 @@ inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * default: break; /* Not possible. */ } } - std::bitset<7> test_astate = astate | std::bitset<7>(0b0000100); + std::bitset<7> test_astate = astate | std::bitset<7>(0b0000101); if(!test_astate.all()) attr_error(test_astate, atok_lookup_t_node_loc, report_error); } @@ -3158,6 +3157,30 @@ inline void load_grid_loc(const pugi::xml_node &root, T &out, Context &context, // Update current file offset in case an error is encountered. *offset_debug = root.offset_debug(); + for(pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()){ + atok_t_grid_loc in = lex_attr_t_grid_loc(attr.name(), report_error); + switch(in){ + case atok_t_grid_loc::BLOCK_TYPE_ID: + /* Attribute block_type_id is already set */ + break; + case atok_t_grid_loc::HEIGHT_OFFSET: + /* Attribute height_offset is already set */ + break; + case atok_t_grid_loc::LAYER: + out.set_grid_loc_layer(load_int(attr.value(), report_error), context); + break; + case atok_t_grid_loc::WIDTH_OFFSET: + /* Attribute width_offset is already set */ + break; + case atok_t_grid_loc::X: + /* Attribute x is already set */ + break; + case atok_t_grid_loc::Y: + /* Attribute y is already set */ + break; + default: break; /* Not possible. */ + } + } if(root.first_child().type() == pugi::node_element) noreturn_report(report_error, "Unexpected child element in ."); @@ -3224,10 +3247,8 @@ inline void load_grid_locs(const pugi::xml_node &root, T &out, Context &context, memset(&grid_loc_x, 0, sizeof(grid_loc_x)); int grid_loc_y; memset(&grid_loc_y, 0, sizeof(grid_loc_y)); - int grid_loc_layer; - memset(&grid_loc_layer,0,sizeof(grid_loc_layer)); - load_grid_loc_required_attributes(node, &grid_loc_block_type_id, &grid_loc_height_offset, &grid_loc_width_offset, &grid_loc_x, &grid_loc_y, &grid_loc_layer, report_error); - auto child_context = out.add_grid_locs_grid_loc(context, grid_loc_block_type_id, grid_loc_height_offset, grid_loc_width_offset, grid_loc_x, grid_loc_y, grid_loc_layer); + load_grid_loc_required_attributes(node, &grid_loc_block_type_id, &grid_loc_height_offset, &grid_loc_width_offset, &grid_loc_x, &grid_loc_y, report_error); + auto child_context = out.add_grid_locs_grid_loc(context, grid_loc_block_type_id, grid_loc_height_offset, grid_loc_width_offset, grid_loc_x, grid_loc_y); load_grid_loc(node, out, child_context, report_error, offset_debug); out.finish_grid_locs_grid_loc(child_context); } @@ -3252,7 +3273,7 @@ inline void load_node_loc(const pugi::xml_node &root, T &out, Context &context, atok_t_node_loc in = lex_attr_t_node_loc(attr.name(), report_error); switch(in){ case atok_t_node_loc::LAYER: - /* Attribute layer is already set */ + out.set_node_loc_layer(load_int(attr.value(), report_error), context); break; case atok_t_node_loc::PTC: /* Attribute ptc is already set */ @@ -3435,8 +3456,6 @@ inline void load_node(const pugi::xml_node &root, T &out, Context &context, cons switch(in){ case gtok_t_node::LOC: { - int node_loc_layer; - memset(&node_loc_layer, 0, sizeof(node_loc_layer)); int node_loc_ptc; memset(&node_loc_ptc, 0, sizeof(node_loc_ptc)); int node_loc_xhigh; @@ -3447,8 +3466,8 @@ inline void load_node(const pugi::xml_node &root, T &out, Context &context, cons memset(&node_loc_yhigh, 0, sizeof(node_loc_yhigh)); int node_loc_ylow; memset(&node_loc_ylow, 0, sizeof(node_loc_ylow)); - load_node_loc_required_attributes(node, &node_loc_layer, &node_loc_ptc, &node_loc_xhigh, &node_loc_xlow, &node_loc_yhigh, &node_loc_ylow, report_error); - auto child_context = out.init_node_loc(context, node_loc_layer, node_loc_ptc, node_loc_xhigh, node_loc_xlow, node_loc_yhigh, node_loc_ylow); + load_node_loc_required_attributes(node, &node_loc_ptc, &node_loc_xhigh, &node_loc_xlow, &node_loc_yhigh, &node_loc_ylow, report_error); + auto child_context = out.init_node_loc(context, node_loc_ptc, node_loc_xhigh, node_loc_xlow, node_loc_yhigh, node_loc_ylow); load_node_loc(node, out, child_context, report_error, offset_debug); out.finish_node_loc(child_context); } @@ -3946,11 +3965,10 @@ inline void write_grid_locs(T &in, std::ostream &os, Context &context){ os << "\n"; } } diff --git a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_capnp.h b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_capnp.h index f59e1d24629..8bea9c7c033 100644 --- a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_capnp.h +++ b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_capnp.h @@ -4,9 +4,9 @@ * https://github.com/duck2/uxsdcxx * Modify only if your build process doesn't involve regenerating this file. * - * Cmdline: uxsdcxx/uxsdcap.py /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd - * Input file: /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd - * md5sum of input file: 8672cb3951993f7e0ea3433a02507672 + * Cmdline: uxsdcxx/uxsdcap.py /home/amin/Desktop/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + * Input file: /home/amin/Desktop/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + * md5sum of input file: 38649d034e0edccbcb511ddb8915cdff */ #include @@ -672,6 +672,7 @@ inline void load_grid_loc_capnp_type(const ucap::GridLoc::Reader &root, T &out, (void)report_error; (void)stack; + out.set_grid_loc_layer(root.getLayer(), context); } template @@ -687,7 +688,7 @@ inline void load_grid_locs_capnp_type(const ucap::GridLocs::Reader &root, T &out auto data = root.getGridLocs(); out.preallocate_grid_locs_grid_loc(context, data.size()); for(const auto & el : data) { - auto child_context = out.add_grid_locs_grid_loc(context, el.getBlockTypeId(), el.getHeightOffset(), el.getWidthOffset(), el.getX(), el.getY(), el.getLayer()); + auto child_context = out.add_grid_locs_grid_loc(context, el.getBlockTypeId(), el.getHeightOffset(), el.getWidthOffset(), el.getX(), el.getY()); load_grid_loc_capnp_type(el, out, child_context, report_error, stack); out.finish_grid_locs_grid_loc(child_context); stack->back().second += 1; @@ -704,6 +705,7 @@ inline void load_node_loc_capnp_type(const ucap::NodeLoc::Reader &root, T &out, (void)report_error; (void)stack; + out.set_node_loc_layer(root.getLayer(), context); out.set_node_loc_side(conv_enum_loc_side(root.getSide(), report_error), context); } @@ -775,7 +777,7 @@ inline void load_node_capnp_type(const ucap::Node::Reader &root, T &out, Context stack->push_back(std::make_pair("getLoc", 0)); if (root.hasLoc()) { auto child_el = root.getLoc(); - auto child_context = out.init_node_loc(context, child_el.getLayer(), child_el.getPtc(), child_el.getXhigh(), child_el.getXlow(), child_el.getYhigh(), child_el.getYlow()); + auto child_context = out.init_node_loc(context, child_el.getPtc(), child_el.getXhigh(), child_el.getXlow(), child_el.getYhigh(), child_el.getYlow()); load_node_loc_capnp_type(child_el, out, child_context, report_error, stack); out.finish_node_loc(child_context); } @@ -1117,6 +1119,7 @@ inline void write_grid_locs_capnp_type(T &in, ucap::GridLocs::Builder &root, Con auto child_context = in.get_grid_locs_grid_loc(i, context); grid_locs_grid_loc.setBlockTypeId(in.get_grid_loc_block_type_id(child_context)); grid_locs_grid_loc.setHeightOffset(in.get_grid_loc_height_offset(child_context)); + grid_locs_grid_loc.setLayer(in.get_grid_loc_layer(child_context)); grid_locs_grid_loc.setWidthOffset(in.get_grid_loc_width_offset(child_context)); grid_locs_grid_loc.setX(in.get_grid_loc_x(child_context)); grid_locs_grid_loc.setY(in.get_grid_loc_y(child_context)); diff --git a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h index 9a61c8cbe12..d31082159a7 100644 --- a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h +++ b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h @@ -4,9 +4,9 @@ * https://github.com/duck2/uxsdcxx * Modify only if your build process doesn't involve regenerating this file. * - * Cmdline: uxsdcxx/uxsdcxx.py /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd - * Input file: /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd - * md5sum of input file: 8672cb3951993f7e0ea3433a02507672 + * Cmdline: uxsdcxx/uxsdcxx.py /home/amin/Desktop/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + * Input file: /home/amin/Desktop/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + * md5sum of input file: 38649d034e0edccbcb511ddb8915cdff */ #include @@ -330,9 +330,9 @@ class RrGraphBase { /** Generated for complex type "grid_loc": * + * * * - * * * * @@ -340,10 +340,11 @@ class RrGraphBase { */ virtual inline int get_grid_loc_block_type_id(typename ContextTypes::GridLocReadContext &ctx) = 0; virtual inline int get_grid_loc_height_offset(typename ContextTypes::GridLocReadContext &ctx) = 0; + virtual inline int get_grid_loc_layer(typename ContextTypes::GridLocReadContext &ctx) = 0; + virtual inline void set_grid_loc_layer(int layer, typename ContextTypes::GridLocWriteContext &ctx) = 0; virtual inline int get_grid_loc_width_offset(typename ContextTypes::GridLocReadContext &ctx) = 0; virtual inline int get_grid_loc_x(typename ContextTypes::GridLocReadContext &ctx) = 0; virtual inline int get_grid_loc_y(typename ContextTypes::GridLocReadContext &ctx) = 0; - virtual inline int get_grid_loc_layer(typename ContextTypes::GridLocReadContext &ctx) =0; /** Generated for complex type "grid_locs": * @@ -353,14 +354,14 @@ class RrGraphBase { * */ virtual inline void preallocate_grid_locs_grid_loc(typename ContextTypes::GridLocsWriteContext &ctx, size_t size) = 0; - virtual inline typename ContextTypes::GridLocWriteContext add_grid_locs_grid_loc(typename ContextTypes::GridLocsWriteContext &ctx, int block_type_id, int height_offset, int width_offset, int x, int y, int layer) = 0; + virtual inline typename ContextTypes::GridLocWriteContext add_grid_locs_grid_loc(typename ContextTypes::GridLocsWriteContext &ctx, int block_type_id, int height_offset, int width_offset, int x, int y) = 0; virtual inline void finish_grid_locs_grid_loc(typename ContextTypes::GridLocWriteContext &ctx) = 0; virtual inline size_t num_grid_locs_grid_loc(typename ContextTypes::GridLocsReadContext &ctx) = 0; virtual inline typename ContextTypes::GridLocReadContext get_grid_locs_grid_loc(int n, typename ContextTypes::GridLocsReadContext &ctx) = 0; /** Generated for complex type "node_loc": * - * + * * * * @@ -370,6 +371,7 @@ class RrGraphBase { * */ virtual inline int get_node_loc_layer(typename ContextTypes::NodeLocReadContext &ctx) = 0; + virtual inline void set_node_loc_layer(int layer, typename ContextTypes::NodeLocWriteContext &ctx) = 0; virtual inline int get_node_loc_ptc(typename ContextTypes::NodeLocReadContext &ctx) = 0; virtual inline enum_loc_side get_node_loc_side(typename ContextTypes::NodeLocReadContext &ctx) = 0; virtual inline void set_node_loc_side(enum_loc_side side, typename ContextTypes::NodeLocWriteContext &ctx) = 0; @@ -440,7 +442,7 @@ class RrGraphBase { virtual inline void set_node_direction(enum_node_direction direction, typename ContextTypes::NodeWriteContext &ctx) = 0; virtual inline unsigned int get_node_id(typename ContextTypes::NodeReadContext &ctx) = 0; virtual inline enum_node_type get_node_type(typename ContextTypes::NodeReadContext &ctx) = 0; - virtual inline typename ContextTypes::NodeLocWriteContext init_node_loc(typename ContextTypes::NodeWriteContext &ctx, int layer, int ptc, int xhigh, int xlow, int yhigh, int ylow) = 0; + virtual inline typename ContextTypes::NodeLocWriteContext init_node_loc(typename ContextTypes::NodeWriteContext &ctx, int ptc, int xhigh, int xlow, int yhigh, int ylow) = 0; virtual inline void finish_node_loc(typename ContextTypes::NodeLocWriteContext &ctx) = 0; virtual inline typename ContextTypes::NodeLocReadContext get_node_loc(typename ContextTypes::NodeReadContext &ctx) = 0; virtual inline typename ContextTypes::NodeTimingWriteContext init_node_timing(typename ContextTypes::NodeWriteContext &ctx, float C, float R) = 0; diff --git a/libs/librrgraph/src/io/rr_graph.xsd b/libs/librrgraph/src/io/rr_graph.xsd index 4c05adfe5d5..17dfe09a7e0 100644 --- a/libs/librrgraph/src/io/rr_graph.xsd +++ b/libs/librrgraph/src/io/rr_graph.xsd @@ -208,7 +208,7 @@ - + @@ -259,7 +259,7 @@ - + diff --git a/libs/librrgraph/src/io/rr_graph_reader.cpp b/libs/librrgraph/src/io/rr_graph_reader.cpp index 16a340c08d5..a62f41d84d9 100644 --- a/libs/librrgraph/src/io/rr_graph_reader.cpp +++ b/libs/librrgraph/src/io/rr_graph_reader.cpp @@ -1,4 +1,4 @@ -/*This function loads in a routing resource graph written in xml format +/* This function loads in a routing resource graph written in xml format * into vpr when the option --read_rr_graph is specified. * When it is not specified the build_rr_graph function is then called. * This is done using the libpugixml library. This is useful @@ -11,7 +11,7 @@ * to ensure it matches. An error will through if any feature does not match. * Other elements such as edges, nodes, and switches * are overwritten by the rr graph file if one is specified. If an optional - * identifier such as capacitance is not specified, it is set to 0*/ + * identifier such as capacitance is not specified, it is set to 0 */ #include "rr_graph_reader.h" diff --git a/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h b/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h index c0bfd38b7c6..9154e353643 100644 --- a/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h +++ b/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h @@ -326,6 +326,12 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { // Initialize internal data init_side_map(); init_segment_inf_x_y(); + curr_tmp_block_type_id = -1; + curr_tmp_height_offset = -1; + curr_tmp_width_offset = -1; + curr_tmp_layer = 0; + curr_tmp_x = -1; + curr_tmp_y = -1; } /* A truth table to help understand the conversion from VPR side mask to uxsd side code @@ -686,12 +692,13 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { * */ - inline int init_node_loc(int& inode, int layer, int ptc, int xhigh, int xlow, int yhigh, int ylow) final { + inline int init_node_loc(int& inode, int ptc, int xhigh, int xlow, int yhigh, int ylow) final { auto node = (*rr_nodes_)[inode]; RRNodeId node_id = node.id(); rr_graph_builder_->set_node_coordinates(node_id, xlow, ylow, xhigh, yhigh); - rr_graph_builder_->set_node_layer(node_id, layer); + // We set the layer num 0 - If it is specified in the XML, it will be overwritten + rr_graph_builder_->set_node_layer(node_id, 0); rr_graph_builder_->set_node_ptc_num(node_id, ptc); return inode; } @@ -719,6 +726,15 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { return rr_graph_->node_ylow(node.id()); } + inline void set_node_loc_layer(int layer_num, int& inode) final { + auto node = (*rr_nodes_)[inode]; + RRNodeId node_id = node.id(); + + + VTR_ASSERT(layer_num >= 0); + rr_graph_builder_->set_node_layer(node_id, layer_num); + } + inline void set_node_loc_side(uxsd::enum_loc_side side, int& inode) final { auto node = (*rr_nodes_)[inode]; RRNodeId node_id = node.id(); @@ -1538,28 +1554,58 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { grid_.grid_size(), size); } } - inline void* add_grid_locs_grid_loc(void*& /*ctx*/, int block_type_id, int height_offset, int width_offset, int x, int y, int layer) final { - const auto& type = grid_.get_physical_type({x, y, layer}); - int grid_width_offset = grid_.get_width_offset({x, y, layer}); - int grid_height_offset = grid_.get_height_offset({x, y, layer}); + inline void* add_grid_locs_grid_loc(void*& /*ctx*/, int block_type_id, int height_offset, int width_offset, int x, int y) final { + curr_tmp_block_type_id = block_type_id; + curr_tmp_height_offset = height_offset; + curr_tmp_width_offset = width_offset; + curr_tmp_x = x; + curr_tmp_y = y; - if (type->index != block_type_id) { + return nullptr; + } + inline void finish_grid_locs_grid_loc(void*& /*ctx*/) final { + VTR_ASSERT(curr_tmp_block_type_id >= 0); + VTR_ASSERT(curr_tmp_height_offset >= 0); + VTR_ASSERT(curr_tmp_width_offset >= 0); + VTR_ASSERT(curr_tmp_layer >= 0); + VTR_ASSERT(curr_tmp_x >= 0); + VTR_ASSERT(curr_tmp_y >= 0); + const auto& type = grid_.get_physical_type({curr_tmp_x, curr_tmp_y, curr_tmp_layer}); + int grid_width_offset = grid_.get_width_offset({curr_tmp_x, curr_tmp_y, curr_tmp_layer}); + int grid_height_offset = grid_.get_height_offset({curr_tmp_x, curr_tmp_y, curr_tmp_layer}); + + if (type->index != curr_tmp_block_type_id) { report_error( - "Architecture file does not match RR graph's block_type_id at (%d, %d): arch used ID %d, RR graph used ID %d.", x, y, - (type->index), block_type_id); + "Architecture file does not match RR graph's block_type_id at (%d, %d): arch used ID %d, RR graph used ID %d.", + curr_tmp_layer, + curr_tmp_x, + curr_tmp_y, + (type->index), + curr_tmp_block_type_id); } - if (grid_width_offset != width_offset) { + if (grid_width_offset != curr_tmp_width_offset) { report_error( - "Architecture file does not match RR graph's width_offset at (%d, %d)", x, y); + "Architecture file does not match RR graph's width_offset at (%d, %d)", + curr_tmp_layer, + curr_tmp_x, + curr_tmp_y); } - if (grid_height_offset != height_offset) { + if (grid_height_offset != curr_tmp_height_offset) { report_error( - "Architecture file does not match RR graph's height_offset at (%d, %d)", x, y); + "Architecture file does not match RR graph's height_offset at (%d, %d)", + curr_tmp_layer, + curr_tmp_x, + curr_tmp_y); } - return nullptr; + + curr_tmp_block_type_id = -1; + curr_tmp_height_offset = -1; + curr_tmp_width_offset = -1; + curr_tmp_layer = 0; + curr_tmp_x = -1; + curr_tmp_y = -1; } - inline void finish_grid_locs_grid_loc(void*& /*ctx*/) final {} inline void* init_rr_graph_grid(void*& /*ct*/) final { return nullptr; @@ -1594,6 +1640,10 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { return grid_.get_grid_locs_grid_loc(n); } + inline void set_grid_loc_layer(int layer_num, void*& /*ctx*/) final { + curr_tmp_layer = layer_num; + } + /** Generated for complex type "rr_graph": * @@ -2028,4 +2078,12 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { vtr::interned_string empty_; const std::function* report_error_; bool is_flat_; + + // Temporary data to check grid block types + int curr_tmp_block_type_id; + int curr_tmp_height_offset; + int curr_tmp_width_offset; + int curr_tmp_layer; + int curr_tmp_x; + int curr_tmp_y; }; diff --git a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp index db77f7bc999..9c99d068554 100644 --- a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp +++ b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp @@ -2,11 +2,11 @@ # https://github.com/duck2/uxsdcxx # Modify only if your build process doesn't involve regenerating this file. # -# Cmdline: uxsdcxx/uxsdcap.py /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd -# Input file: /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd -# md5sum of input file: 8672cb3951993f7e0ea3433a02507672 +# Cmdline: uxsdcxx/uxsdcap.py /home/amin/Desktop/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd +# Input file: /home/amin/Desktop/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd +# md5sum of input file: 38649d034e0edccbcb511ddb8915cdff -@0xe9a519eb0e454dd4; +@0xd7cc43f5845f4c7f; using Cxx = import "/capnp/c++.capnp"; $Cxx.namespace("ucap"); @@ -151,10 +151,10 @@ struct BlockTypes { struct GridLoc { blockTypeId @0 :Int32; heightOffset @1 :Int32; - widthOffset @2 :Int32; - x @3 :Int32; - y @4 :Int32; - layer @5 : Int32; + layer @2 :Int32 = 0; + widthOffset @3 :Int32; + x @4 :Int32; + y @5 :Int32; } struct GridLocs { @@ -162,7 +162,7 @@ struct GridLocs { } struct NodeLoc { - layer @0 :Int32; + layer @0 :Int32 = 0; ptc @1 :Int32; side @2 :LocSide; xhigh @3 :Int32; diff --git a/libs/libvtrutil/src/vtr_util.cpp b/libs/libvtrutil/src/vtr_util.cpp index 45ee3035883..2a7a247bde1 100644 --- a/libs/libvtrutil/src/vtr_util.cpp +++ b/libs/libvtrutil/src/vtr_util.cpp @@ -2,6 +2,7 @@ #include #include //For errno #include +#include #include #include @@ -455,28 +456,15 @@ bool file_exists(const char* filename) { return false; } -/* Date:July 17th, 2013 - * Author: Daniel Chen */ /** * @brief Checks the file extension of an file to ensure correct file format. * - * Returns true if format is correct, and false otherwise. - * @note This is probably a fragile check, but at least should - * prevent common problems such as swapping architecture file - * and blif file on the VPR command line. + * Returns true if the extension is correct, and false otherwise. */ -bool check_file_name_extension(const char* file_name, - const char* file_extension) { - const char* str; - int len_extension; - - len_extension = std::strlen(file_extension); - str = std::strstr(file_name, file_extension); - if (str == nullptr || (*(str + len_extension) != '\0')) { - return false; - } - - return true; +bool check_file_name_extension(std::string file_name, + std::string file_extension) { + auto ext = std::filesystem::path(file_name).extension(); + return ext == file_extension; } /** diff --git a/libs/libvtrutil/src/vtr_util.h b/libs/libvtrutil/src/vtr_util.h index 08562d3d092..edcb7ba8598 100644 --- a/libs/libvtrutil/src/vtr_util.h +++ b/libs/libvtrutil/src/vtr_util.h @@ -69,8 +69,7 @@ double atod(const std::string& value); */ int get_file_line_number_of_last_opened_file(); bool file_exists(const char* filename); -bool check_file_name_extension(const char* file_name, - const char* file_extension); +bool check_file_name_extension(std::string file_name, std::string file_extension); extern std::string out_file_prefix; diff --git a/vpr/CMakeLists.txt b/vpr/CMakeLists.txt index 1568ff0547f..1ab5e2861e9 100644 --- a/vpr/CMakeLists.txt +++ b/vpr/CMakeLists.txt @@ -47,6 +47,11 @@ if(${VTR_ENABLE_CAPNPROTO}) add_definitions("-DVTR_ENABLE_CAPNPROTO") endif() +if(${VPR_DEBUG_PARTITION_TREE}) + message(STATUS "VPR: Partition tree debug logs: enabled") + add_definitions("-DVPR_DEBUG_PARTITION_TREE") +endif() + #Create the library add_library(libvpr STATIC ${LIB_HEADERS} diff --git a/vpr/asan.supp b/vpr/asan.supp index c24211d07d9..ff6479bc3bc 100644 --- a/vpr/asan.supp +++ b/vpr/asan.supp @@ -3,3 +3,4 @@ #Suppress known errors from the TBB library #These are some errors from the library that we can't fix and suspect are spurious interceptor_via_lib:libtbb.so + diff --git a/vpr/lsan.supp b/vpr/lsan.supp index a3e291c723c..2a7afafb588 100644 --- a/vpr/lsan.supp +++ b/vpr/lsan.supp @@ -6,3 +6,4 @@ leak:libfontconfig.so #Leaks from TBB library, which #allows VPR to run with parallelism leak:libtbb.so +leak:pthread_getattr_np diff --git a/vpr/main.ui b/vpr/main.ui index d4583c6be54..babbccfc3e1 100644 --- a/vpr/main.ui +++ b/vpr/main.ui @@ -1,7 +1,160 @@ - + + + False + + + True + False + vertical + + + + True + False + 1 + + + True + False + vertical + + + True + False + View: + + + False + True + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + + + + + True + False + vertical + + + True + False + Transparency: (0 - 255) + + + False + True + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + 0 + + + + + False + True + 1 + + + + + @@ -14,16 +167,16 @@ - False + False True - False + False vertical True - False + False Toggle Block Internals: @@ -35,7 +188,7 @@ True - True + True False @@ -46,7 +199,7 @@ True - False + False Toggle Block Pin Util: @@ -58,9 +211,9 @@ True - False + False 0 - 0 + 0 None All @@ -77,7 +230,7 @@ True - False + False Toggle Placement Macros: @@ -89,9 +242,9 @@ True - False + False 0 - 0 + 0 None Regular @@ -106,7 +259,7 @@ True - False + False Toggle NoC Display @@ -118,7 +271,7 @@ True - False + False 0 None @@ -136,11 +289,11 @@ Block Outline True - True - False + True + False 0 True - True + True False @@ -149,42 +302,42 @@ - - Block Text + + Draw Partitions True - True - False + True + False 0 - True - True + True False True - 9 + 8 - - Draw Partitions + + Block Text True - True - False + True + False 0 - True + True + True False True - 8 + 9 - 5 - 0 + 5 + 0 @@ -194,78 +347,79 @@ test - False + False + True - False + False Save True - True - True - right + True + True + right - 0 - 0 + 0 + 0 Pause True - True - True - right + True + True + right - 0 - 2 + 0 + 2 Debug True - True - True + True + True - 0 - 1 + 0 + 1 Manual Move True - True - True + True + True 0 - True + True - 0 - 3 + 0 + 3 True - False + False Toggle Critical Path: - 0 - 4 + 0 + 4 True - False + False 0 None @@ -276,10 +430,46 @@ - 0 - 5 + 0 + 5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -290,16 +480,16 @@ - False + False True - False + False vertical True - False + False Toggle Nets @@ -311,9 +501,9 @@ True - False + False 0 - 0 + 0 None Cluster Nets @@ -329,7 +519,7 @@ True - False + False Net Transparency (0 - 255) @@ -341,7 +531,7 @@ True - True + True True @@ -353,7 +543,7 @@ True - False + False Net Max Fanout: @@ -365,7 +555,7 @@ True - True + True False @@ -377,16 +567,16 @@ - False + False True - False + False vertical True - False + False Toggle RR: @@ -398,9 +588,9 @@ True - False + False 0 - 0 + 0 None Nodes @@ -419,7 +609,7 @@ True - False + False Toggle Congestion: @@ -431,7 +621,7 @@ True - False + False 0 None @@ -448,7 +638,7 @@ True - False + False Toggle Routing Cong. Cost: @@ -460,7 +650,7 @@ True - False + False 0 None @@ -482,7 +672,7 @@ True - False + False Toggle Routing Bounding Box: @@ -494,7 +684,7 @@ True - True + True False @@ -505,7 +695,7 @@ True - False + False Toggle Routing Expansion Cost: @@ -517,7 +707,7 @@ True - False + False 0 None @@ -538,7 +728,7 @@ True - False + False Toggle Routing Util: @@ -550,7 +740,7 @@ True - False + False 0 None @@ -570,11 +760,11 @@ Clip Routing Util True - True - False + True + False 0 0.30000001192092896 - True + True False @@ -587,64 +777,84 @@ True - False + False VPR: Versatile Place and Route for FPGAs - 800 - 600 - - - + 800 + 600 + True - False - True + False + True + True - False + False end Zoom Fit True - True - True + True + True - 0 - 0 + 0 + 0 Window True - True - True + True + True start - 0 - 1 + 0 + 1 + + + + + + + + + + + + + + + + + + + + + - 4 - 0 + 4 + 0 2 True - False + False 2 - 0 - 5 + 0 + 5 4 @@ -652,26 +862,26 @@ Proceed True - True - True + True + True end - right + right - 4 - 5 + 4 + 5 True - False + False True True - 0 - 2 + 0 + 2 4 3 @@ -680,7 +890,7 @@ SearchType True - False + False 0 Block ID @@ -691,127 +901,178 @@ - 0 - 0 + 0 + 0 Search True - True - True + True + True - 3 - 0 + 3 + 0 True - True - search + True + search True - gtk-find - Search... + gtk-find + Search... Completion - 1 - 0 + 1 + 0 2 + True - False - True + False + True True - True - False - True + True + False + True BlockPopover True - False + False Block Settings - 0 - 0 + 0 + 0 True - True - False - True + True + False + True NetPopover True - False + False Net Settings - 1 - 0 + 1 + 0 True - True - False - True + True + False + True RoutingPopover True - False + False Routing Options - 2 - 0 + 2 + 0 True - True - False - True + True + False + True MiscPopover True - False + False Misc. - 3 - 0 + 4 + 0 + + + True + True + False + True + 3Dpopover + + + True + False + View + + + + + 3 + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - 0 - 1 + 0 + 1 4 @@ -828,11 +1089,12 @@ - False + False + True - False + False @@ -863,4 +1125,8 @@ + + True + False + diff --git a/vpr/src/base/clustered_netlist.cpp b/vpr/src/base/clustered_netlist.cpp index 7217263dc1a..c446c44a94b 100644 --- a/vpr/src/base/clustered_netlist.cpp +++ b/vpr/src/base/clustered_netlist.cpp @@ -3,13 +3,15 @@ #include "vtr_assert.h" #include "vpr_error.h" +#include + /** * @file * @brief ClusteredNetlist Class Implementation */ ClusteredNetlist::ClusteredNetlist(std::string name, std::string id) - : Netlist(name, id) {} + : Netlist(std::move(name), std::move(id)) {} /* * @@ -28,11 +30,16 @@ t_logical_block_type_ptr ClusteredNetlist::block_type(const ClusterBlockId id) c return block_types_[id]; } -std::vector ClusteredNetlist::blocks_per_type(const t_logical_block_type blk_type) const { +const std::vector& ClusteredNetlist::blocks_per_type(const t_logical_block_type& blk_type) const { + // empty vector is declared static to avoid re-allocation every time the function is called + static std::vector empty_vector; if (blocks_per_type_.count(blk_type.index) == 0) { - std::vector empty_vector; return empty_vector; } + + // the vector is returned as const reference to avoid unnecessary copies, + // especially that returned vectors may be very large as they contain + // all clustered blocks with a specific block type return blocks_per_type_.at(blk_type.index); } @@ -132,7 +139,7 @@ ClusterBlockId ClusteredNetlist::create_block(const char* name, t_pb* pb, t_logi return blk_id; } -ClusterPortId ClusteredNetlist::create_port(const ClusterBlockId blk_id, const std::string name, BitIndex width, PortType type) { +ClusterPortId ClusteredNetlist::create_port(const ClusterBlockId blk_id, const std::string& name, BitIndex width, PortType type) { ClusterPortId port_id = find_port(blk_id, name); if (!port_id) { port_id = Netlist::create_port(blk_id, name, width, type); @@ -163,7 +170,7 @@ ClusterPinId ClusteredNetlist::create_pin(const ClusterPortId port_id, BitIndex return pin_id; } -ClusterNetId ClusteredNetlist::create_net(const std::string name) { +ClusterNetId ClusteredNetlist::create_net(const std::string& name) { //Check if the net has already been created StringId name_id = create_string(name); ClusterNetId net_id = find_net(name_id); @@ -292,9 +299,9 @@ ClusterBlockId ClusteredNetlist::find_block_by_name_fragment(const std::string& ClusterBlockId blk_id = ClusterBlockId::INVALID(); std::regex name_to_match(name_pattern); - for (auto compatible_block_id = cluster_block_candidates.begin(); compatible_block_id != cluster_block_candidates.end(); compatible_block_id++) { - if (std::regex_match(Netlist::block_name(*compatible_block_id), name_to_match)) { - blk_id = *compatible_block_id; + for (auto cluster_block_candidate : cluster_block_candidates) { + if (std::regex_match(Netlist::block_name(cluster_block_candidate), name_to_match)) { + blk_id = cluster_block_candidate; break; } } diff --git a/vpr/src/base/clustered_netlist.h b/vpr/src/base/clustered_netlist.h index f89d521c98a..7a1bd15f28b 100644 --- a/vpr/src/base/clustered_netlist.h +++ b/vpr/src/base/clustered_netlist.h @@ -135,7 +135,7 @@ class ClusteredNetlist : public Netlist blocks_per_type(const t_logical_block_type blk_type) const; + const std::vector& blocks_per_type(const t_logical_block_type& blk_type) const; ///@brief Returns the net of the block attached to the specific pin index ClusterNetId block_net(const ClusterBlockId blk_id, const int pin_index) const; @@ -194,7 +194,7 @@ class ClusteredNetlist : public Netlist from_str(std::string str) { ConvertedValue conv_value; if (str == "move_type") - conv_value.set_value(MOVE_TYPE); + conv_value.set_value(e_agent_space::MOVE_TYPE); else if (str == "move_block_type") - conv_value.set_value(MOVE_BLOCK_TYPE); + conv_value.set_value(e_agent_space::MOVE_BLOCK_TYPE); else { std::stringstream msg; msg << "Invalid conversion from '" << str << "' to e_agent_space (expected one of: " << argparse::join(default_choices(), ", ") << ")"; @@ -478,10 +478,10 @@ struct ParsePlaceAgentSpace { ConvertedValue to_str(e_agent_space val) { ConvertedValue conv_value; - if (val == MOVE_TYPE) + if (val == e_agent_space::MOVE_TYPE) conv_value.set_value("move_type"); else { - VTR_ASSERT(val == MOVE_BLOCK_TYPE); + VTR_ASSERT(val == e_agent_space::MOVE_BLOCK_TYPE); conv_value.set_value("move_block_type"); } return conv_value; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index c87d2bec7fc..9969fee4ada 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -217,12 +217,11 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a #ifdef VPR_USE_TBB //Using Thread Building Blocks if (num_workers == 0) { - //Use default concurrency (i.e. maximum conccurency) + //Use default concurrency (i.e. maximum concurrency) num_workers = tbb::this_task_arena::max_concurrency(); } VTR_LOG("Using up to %zu parallel worker(s)\n", num_workers); - tbb::global_control c(tbb::global_control::max_allowed_parallelism, num_workers); #else //No parallel execution support if (num_workers != 1) { @@ -237,6 +236,7 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a vpr_setup->clock_modeling = options->clock_modeling; vpr_setup->two_stage_clock_routing = options->two_stage_clock_routing; vpr_setup->exit_before_pack = options->exit_before_pack; + vpr_setup->num_workers = num_workers; VTR_LOG("\n"); VTR_LOG("Architecture file: %s\n", options->ArchFile.value().c_str()); @@ -366,6 +366,10 @@ bool vpr_flow(t_vpr_setup& vpr_setup, t_arch& arch) { return true; } + /* Set this here, because tbb::global_control doesn't control anything once it's out of scope + * (contrary to the name). */ + tbb::global_control c(tbb::global_control::max_allowed_parallelism, vpr_setup.num_workers); + { //Pack bool pack_success = vpr_pack_flow(vpr_setup, arch); diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 9e1c9241346..c7b2fc97e06 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -396,19 +396,6 @@ struct PlacementContext : public Context { * Used for unique identification and consistency checking */ std::string placement_id; - - /** - * @brief Map physical block type to RL-agent block type - * - * RL-agent block types are the physical block types that are used in the netlist (at least one logical block in the netlist maps to). - * As an example: - * Having physical block types (EMPTY, LAB, DSP, IO), - * agent block types would be (LAB,IO) if netlist doesn't contain DSP blocks. - * Key : physical (agent) block type index - * Value : agent (physical) block type index - */ - std::unordered_map phys_blk_type_to_agent_blk_type_map; - std::unordered_map agent_blk_type_to_phys_blk_type_map; }; /** diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 5d03e194f8a..5b6e4c52b2f 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1050,7 +1050,7 @@ enum e_agent_algorithm { * can be based on (block_type, move_type) pair. * */ -enum e_agent_space { +enum class e_agent_space { MOVE_TYPE, MOVE_BLOCK_TYPE }; @@ -1790,6 +1790,7 @@ struct t_vpr_setup { e_clock_modeling clock_modeling; ///draw_rr_node.resize(device_ctx.rr_graph.num_nodes()); + draw_state->draw_layer_display.resize(device_ctx.grid.get_num_layers()); + //By default show the lowest layer only. This is the only die layer for 2D FPGAs + draw_state->draw_layer_display[0].visible = true; + draw_state->arch_info = arch; deselect_all(); /* Set initial colors */ @@ -990,83 +995,83 @@ static void highlight_blocks(double x, double y) { /// determine block /// ezgl::rectangle clb_bbox; - //TODO: Change when graphics supports 3D FPGAs - VTR_ASSERT(device_ctx.grid.get_num_layers() == 1); - int layer_num = 0; - // iterate over grid x - for (int i = 0; i < (int)device_ctx.grid.width(); ++i) { - if (draw_coords->tile_x[i] > x) { - break; // we've gone to far in the x direction - } - // iterate over grid y - for (int j = 0; j < (int)device_ctx.grid.height(); ++j) { - if (draw_coords->tile_y[j] > y) { - break; // we've gone to far in the y direction + //iterate over grid z (layers) first, so we draw from bottom to top die. This makes partial transparency of layers draw properly. + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + // iterate over grid x + for (int i = 0; i < (int)device_ctx.grid.width(); ++i) { + if (draw_coords->tile_x[i] > x) { + break; // we've gone too far in the x direction } - // iterate over sub_blocks - const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num}); - for (int k = 0; k < type->capacity; ++k) { - // TODO: Change when graphics supports 3D - clb_index = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}); - if (clb_index != EMPTY_BLOCK_ID) { - clb_bbox = draw_coords->get_absolute_clb_bbox(clb_index, - cluster_ctx.clb_nlist.block_type(clb_index)); - if (clb_bbox.contains({x, y})) { - break; - } else { - clb_index = EMPTY_BLOCK_ID; + // iterate over grid y + for (int j = 0; j < (int)device_ctx.grid.height(); ++j) { + if (draw_coords->tile_y[j] > y) { + break; // we've gone too far in the y direction + } + // iterate over sub_blocks + const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num}); + for (int k = 0; k < type->capacity; ++k) { + clb_index = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}); + if (clb_index != EMPTY_BLOCK_ID) { + clb_bbox = draw_coords->get_absolute_clb_bbox(clb_index, + cluster_ctx.clb_nlist.block_type(clb_index)); + if (clb_bbox.contains({x, y})) { + break; + } else { + clb_index = EMPTY_BLOCK_ID; + } } } + if (clb_index != EMPTY_BLOCK_ID) { + break; // we've found something + } } if (clb_index != EMPTY_BLOCK_ID) { break; // we've found something } } - if (clb_index != EMPTY_BLOCK_ID) { - break; // we've found something - } - } - if (clb_index == EMPTY_BLOCK_ID || clb_index == ClusterBlockId::INVALID()) { - //Nothing found - return; - } + if (clb_index == EMPTY_BLOCK_ID || clb_index == ClusterBlockId::INVALID()) { + //Nothing found + return; + } - VTR_ASSERT(clb_index != EMPTY_BLOCK_ID); + VTR_ASSERT(clb_index != EMPTY_BLOCK_ID); - // note: this will clear the selected sub-block if show_blk_internal is 0, - // or if it doesn't find anything - ezgl::point2d point_in_clb = ezgl::point2d(x, y) - clb_bbox.bottom_left(); - highlight_sub_block(point_in_clb, clb_index, - cluster_ctx.clb_nlist.block_pb(clb_index)); + // note: this will clear the selected sub-block if show_blk_internal is 0, + // or if it doesn't find anything + ezgl::point2d point_in_clb = ezgl::point2d(x, y) - clb_bbox.bottom_left(); + highlight_sub_block(point_in_clb, clb_index, + cluster_ctx.clb_nlist.block_pb(clb_index)); - if (get_selected_sub_block_info().has_selection()) { - t_pb* selected_subblock = get_selected_sub_block_info().get_selected_pb(); - sprintf(msg, "sub-block %s (a \"%s\") selected", - selected_subblock->name, - selected_subblock->pb_graph_node->pb_type->name); - } else { - /* Highlight block and fan-in/fan-outs. */ - draw_highlight_blocks_color(cluster_ctx.clb_nlist.block_type(clb_index), - clb_index); - sprintf(msg, "Block #%zu (%s) at (%d, %d) selected.", size_t(clb_index), - cluster_ctx.clb_nlist.block_name(clb_index).c_str(), - place_ctx.block_locs[clb_index].loc.x, - place_ctx.block_locs[clb_index].loc.y); - } + if (get_selected_sub_block_info().has_selection()) { + t_pb* selected_subblock = get_selected_sub_block_info().get_selected_pb(); + sprintf(msg, "sub-block %s (a \"%s\") selected", + selected_subblock->name, + selected_subblock->pb_graph_node->pb_type->name); + } else { + /* Highlight block and fan-in/fan-outs. */ + draw_highlight_blocks_color(cluster_ctx.clb_nlist.block_type(clb_index), + clb_index); + sprintf(msg, "Block #%zu (%s) at (%d, %d) selected.", size_t(clb_index), + cluster_ctx.clb_nlist.block_name(clb_index).c_str(), + place_ctx.block_locs[clb_index].loc.x, + place_ctx.block_locs[clb_index].loc.y); + } - //If manual moves is activated, then user can select block from the grid. - t_draw_state* draw_state = get_draw_state_vars(); - if (draw_state->manual_moves_state.manual_move_enabled) { - draw_state->manual_moves_state.user_highlighted_block = true; - if (!draw_state->manual_moves_state.manual_move_window_is_open) { - draw_manual_moves_window(std::to_string(size_t(clb_index))); + //If manual moves is activated, then user can select block from the grid. + t_draw_state* draw_state = get_draw_state_vars(); + if (draw_state->manual_moves_state.manual_move_enabled) { + draw_state->manual_moves_state.user_highlighted_block = true; + if (!draw_state->manual_moves_state.manual_move_window_is_open) { + draw_manual_moves_window(std::to_string(size_t(clb_index))); + } } - } - application.update_message(msg); - application.refresh_drawing(); + application.update_message(msg); + application.refresh_drawing(); + } } + static void setup_default_ezgl_callbacks(ezgl::application* app) { // Connect press_proceed function to the Proceed button GObject* proceed_button = app->get_object("ProceedButton"); @@ -1370,25 +1375,15 @@ void clear_colored_locations() { draw_state->colored_locations.clear(); } -// This routine takes in a (x,y) location. -// If the input loc is marked in colored_locations vector, the function will return true and the correspnding color is sent back in loc_color -// otherwise, the function returns false (the location isn't among the highlighted locations) -bool highlight_loc_with_specific_color(int x, int y, ezgl::color& loc_color) { +bool highlight_loc_with_specific_color(t_pl_loc curr_loc, ezgl::color& loc_color) { t_draw_state* draw_state = get_draw_state_vars(); - //define a (x,y) location variable - t_pl_loc curr_loc; - curr_loc.x = x; - curr_loc.y = y; - //TODO: Graphic currently doesn't support 3D FPGAs - curr_loc.layer = 0; - //search for the current location in the vector of colored locations auto it = std::find_if(draw_state->colored_locations.begin(), draw_state->colored_locations.end(), [&curr_loc](const std::pair& vec_element) { return (vec_element.first.x == curr_loc.x - && vec_element.first.y == curr_loc.y); + && vec_element.first.y == curr_loc.y && vec_element.first.layer == curr_loc.layer); }); if (it != draw_state->colored_locations.end()) { @@ -1443,4 +1438,32 @@ size_t get_max_fanout() { return max; } +bool rgb_is_same(ezgl::color color1, ezgl::color color2) { + color1.alpha = 255; + color2.alpha = 255; + return (color1 == color2); +} +t_draw_layer_display get_element_visibility_and_transparency(int src_layer, int sink_layer) { + t_draw_layer_display element_visibility; + t_draw_state* draw_state = get_draw_state_vars(); + + element_visibility.visible = true; + bool cross_layer_enabled = draw_state->cross_layer_display.visible; + + //To only show primitive nets that are connected to currently active layers on the screen + if (!draw_state->draw_layer_display[sink_layer].visible || (!cross_layer_enabled && src_layer != sink_layer)) { + element_visibility.visible = false; /* Don't Draw */ + } + + if (src_layer != sink_layer) { + //assign transparency from cross layer option if connection is between different layers + element_visibility.alpha = draw_state->cross_layer_display.alpha; + } else { + //otherwise assign transparency of current layer + element_visibility.alpha = draw_state->draw_layer_display[src_layer].alpha; + } + + return element_visibility; +} + #endif /* NO_GRAPHICS */ diff --git a/vpr/src/draw/draw.h b/vpr/src/draw/draw.h index 145f05800aa..7e799eb8a97 100644 --- a/vpr/src/draw/draw.h +++ b/vpr/src/draw/draw.h @@ -116,10 +116,17 @@ void set_draw_loc_color(t_pl_loc, ezgl::color); // clear the colored_locations vector void clear_colored_locations(); -// This routine takes in a (x,y) location. -// If the input loc is marked in colored_locations vector, the function will return true and the correspnding color is sent back in loc_color -// otherwise, the function returns false (the location isn't among the highlighted locations) -bool highlight_loc_with_specific_color(int x, int y, ezgl::color& loc_color); +/** + * @brief If the input loc is marked in colored_locations vector, the function will return true and the corresponding color is sent back in loc_color + * otherwise, the function returns false (the location isn't among the highlighted locations) + * + * @param curr_loc The current location that is being checked for whether it must be highlighted or not + * @param loc_color The corresponding color that is to be used to highlight the block + * + * @return Returns true or false depending on whether the block at the specified (x,y,layer) location needs to be highlighted by a specific color. + * The corresponding color is returned by reference. + */ +bool highlight_loc_with_specific_color(t_pl_loc curr_loc, ezgl::color& loc_color); /* Because the list of possible block type colours is finite, we wrap around possible colours if there are more * block types than colour choices. This ensures we support any number of types, although the colours may repeat.*/ @@ -132,6 +139,22 @@ void toggle_window_mode(GtkWidget* /*widget*/, ezgl::application* /*app*/); size_t get_max_fanout(); +/** + * @brief Takes in two colors and compares rgb values, ignoring transparency/alpha + * Sets both transparencies to opaque and then compares the colors. + */ +bool rgb_is_same(ezgl::color color1, ezgl::color color2); + +/** + * @brief Takes in the layer number of the src and sink of an element(flyline, rr_node connections, etc...) and returns a t_draw_layer_display object holding the + * information of the visibility of the element as well as the transparency based on the setting set by the user from the view menu in the UI. + * + * @param src_layer + * @param sink_layer + * @return Returns whether the element should be drawn (true or false) and the transparency factor (0 - transparent ,255 - opaque) as a t_draw_layer_display object + */ +t_draw_layer_display get_element_visibility_and_transparency(int src_layer, int sink_layer); + #endif /* NO_GRAPHICS */ #endif /* DRAW_H */ diff --git a/vpr/src/draw/draw_basic.cpp b/vpr/src/draw/draw_basic.cpp index 0eb49a02034..c1b8715c1c7 100644 --- a/vpr/src/draw/draw_basic.cpp +++ b/vpr/src/draw/draw_basic.cpp @@ -108,103 +108,112 @@ void drawplace(ezgl::renderer* g) { ClusterBlockId bnum; int num_sub_tiles; - //TODO: Change when graphics supports 3D FPGAs - VTR_ASSERT(device_ctx.grid.get_num_layers() == 1); - int layer_num = 0; + int total_num_layers = device_ctx.grid.get_num_layers(); g->set_line_width(0); - for (int i = 0; i < (int)device_ctx.grid.width(); i++) { - for (int j = 0; j < (int)device_ctx.grid.height(); j++) { - /* Only the first block of a group should control drawing */ - const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num}); - int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num}); - int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num}); - - if (width_offset > 0 - || height_offset > 0) - continue; - - num_sub_tiles = type->capacity; - /* Don't draw if tile capacity is zero. eg-> corners. */ - if (num_sub_tiles == 0) { - continue; - } - - for (int k = 0; k < num_sub_tiles; ++k) { - /* Look at the tile at start of large block */ - //TODO: Change when graphics supports 3D - bnum = place_ctx.grid_blocks.block_at_location({i, j, k, 0}); - /* Fill background for the clb. Do not fill if "show_blk_internal" - * is toggled. - */ - if (bnum == INVALID_BLOCK_ID) - continue; - - //Determine the block color and logical type - ezgl::color block_color; - t_logical_block_type_ptr logical_block_type = nullptr; - - //flag whether the current location is highlighted with a special color or not - bool current_loc_is_highlighted = false; - - if (placer_breakpoint_reached()) - current_loc_is_highlighted = highlight_loc_with_specific_color(int(i), int(j), - block_color); - - // No color specified at this location; use the block color. - if (current_loc_is_highlighted == false) { - if (bnum != EMPTY_BLOCK_ID) { - block_color = draw_state->block_color(bnum); - } else { - block_color = get_block_type_color(type); - block_color = lighten_color(block_color, - EMPTY_BLOCK_LIGHTEN_FACTOR); + for (int layer_num = 0; layer_num < total_num_layers; layer_num++) { + if (draw_state->draw_layer_display[layer_num].visible) { + for (int i = 0; i < (int)device_ctx.grid.width(); i++) { + for (int j = 0; j < (int)device_ctx.grid.height(); j++) { + /* Only the first block of a group should control drawing */ + const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num}); + int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num}); + int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num}); + + //The transparency level for the current layer being drawn (0-255) + // 0 - opaque, 255 - transparent + int transparency_factor = draw_state->draw_layer_display[layer_num].alpha; + + if (width_offset > 0 + || height_offset > 0) + continue; + + num_sub_tiles = type->capacity; + /* Don't draw if tile capacity is zero. eg-> corners. */ + if (num_sub_tiles == 0) { + continue; } - } - - logical_block_type = pick_logical_type(type); - g->set_color(block_color); - /* Get coords of current sub_tile */ - ezgl::rectangle abs_clb_bbox = draw_coords->get_absolute_clb_bbox(layer_num, - i, - j, - k, - logical_block_type); - ezgl::point2d center = abs_clb_bbox.center(); - - g->fill_rectangle(abs_clb_bbox); - - g->set_color(ezgl::BLACK); - - g->set_line_dash( - (EMPTY_BLOCK_ID == bnum) ? ezgl::line_dash::asymmetric_5_3 : ezgl::line_dash::none); - if (draw_state->draw_block_outlines) { - g->draw_rectangle(abs_clb_bbox); - } - - if (draw_state->draw_block_text) { - /* Draw text if the space has parts of the netlist */ - if (bnum != EMPTY_BLOCK_ID && bnum != INVALID_BLOCK_ID) { - std::string name = cluster_ctx.clb_nlist.block_name( - bnum) - + vtr::string_fmt(" (#%zu)", size_t(bnum)); - - g->draw_text(center, name.c_str(), abs_clb_bbox.width(), - abs_clb_bbox.height()); - } - /* Draw text for block type so that user knows what block */ - if (width_offset == 0 - && height_offset == 0) { - std::string block_type_loc = type->name; - block_type_loc += vtr::string_fmt(" (%d,%d)", i, j); - - g->draw_text( - center - - ezgl::point2d(0, - abs_clb_bbox.height() / 4), - block_type_loc.c_str(), abs_clb_bbox.width(), - abs_clb_bbox.height()); + for (int k = 0; k < num_sub_tiles; ++k) { + /* Look at the tile at start of large block */ + bnum = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}); + /* Fill background for the clb. Do not fill if "show_blk_internal" + * is toggled. + */ + if (bnum == INVALID_BLOCK_ID) + continue; + + //Determine the block color and logical type + ezgl::color block_color; + t_logical_block_type_ptr logical_block_type = nullptr; + + //flag whether the current location is highlighted with a special color or not + bool current_loc_is_highlighted = false; + + if (placer_breakpoint_reached()) { + t_pl_loc curr_loc; + curr_loc.x = i; + curr_loc.y = j; + curr_loc.layer = layer_num; + current_loc_is_highlighted = highlight_loc_with_specific_color(curr_loc, + block_color); + } + // No color specified at this location; use the block color. + if (current_loc_is_highlighted == false) { + if (bnum != EMPTY_BLOCK_ID) { + block_color = draw_state->block_color(bnum); + } else { + block_color = get_block_type_color(type); + block_color = lighten_color(block_color, + EMPTY_BLOCK_LIGHTEN_FACTOR); + } + } + + logical_block_type = pick_logical_type(type); + g->set_color(block_color, transparency_factor); + + /* Get coords of current sub_tile */ + ezgl::rectangle abs_clb_bbox = draw_coords->get_absolute_clb_bbox(layer_num, + i, + j, + k, + logical_block_type); + ezgl::point2d center = abs_clb_bbox.center(); + + g->fill_rectangle(abs_clb_bbox); + + g->set_color(ezgl::BLACK, transparency_factor); + + g->set_line_dash( + (EMPTY_BLOCK_ID == bnum) ? ezgl::line_dash::asymmetric_5_3 : ezgl::line_dash::none); + if (draw_state->draw_block_outlines) { + g->draw_rectangle(abs_clb_bbox); + } + + if (draw_state->draw_block_text) { + /* Draw text if the space has parts of the netlist */ + if (bnum != EMPTY_BLOCK_ID && bnum != INVALID_BLOCK_ID) { + std::string name = cluster_ctx.clb_nlist.block_name( + bnum) + + vtr::string_fmt(" (#%zu)", size_t(bnum)); + + g->draw_text(center, name.c_str(), abs_clb_bbox.width(), + abs_clb_bbox.height()); + } + /* Draw text for block type so that user knows what block */ + if (width_offset == 0 + && height_offset == 0) { + std::string block_type_loc = type->name; + block_type_loc += vtr::string_fmt(" (%d,%d)", i, j); + + g->draw_text( + center + - ezgl::point2d(0, + abs_clb_bbox.height() / 4), + block_type_loc.c_str(), abs_clb_bbox.width(), + abs_clb_bbox.height()); + } + } } } } @@ -221,12 +230,17 @@ void drawnets(ezgl::renderer* g) { ClusterBlockId b1, b2; auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& place_ctx = g_vpr_ctx.placement(); + float transparency_factor; float NET_ALPHA = draw_state->net_alpha; g->set_line_dash(ezgl::line_dash::none); g->set_line_width(0); + int driver_block_layer_num = -1; + int sink_block_layer_num = -1; + /* Draw the net as a star from the source to each sink. Draw from centers of * * blocks (or sub blocks in the case of IOs). */ @@ -234,17 +248,35 @@ void drawnets(ezgl::renderer* g) { if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) continue; /* Don't draw */ - g->set_color(draw_state->net_color[net_id], - draw_state->net_color[net_id].alpha * NET_ALPHA); b1 = cluster_ctx.clb_nlist.net_driver_block(net_id); - ezgl::point2d driver_center = draw_coords->get_absolute_clb_bbox(b1, - cluster_ctx.clb_nlist.block_type(b1)) - .center(); + + //The layer of the net driver block + driver_block_layer_num = place_ctx.block_locs[b1].loc.layer; + + //To only show nets that are connected to currently active layers on the screen + if (draw_state->draw_layer_display[driver_block_layer_num].visible == false) { + continue; /* Don't draw */ + } + + ezgl::point2d driver_center = draw_coords->get_absolute_clb_bbox(b1, cluster_ctx.clb_nlist.block_type(b1)).center(); for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { b2 = cluster_ctx.clb_nlist.pin_block(pin_id); - ezgl::point2d sink_center = draw_coords->get_absolute_clb_bbox(b2, - cluster_ctx.clb_nlist.block_type(b2)) - .center(); + + //the layer of the pin block (net sinks) + sink_block_layer_num = place_ctx.block_locs[b2].loc.layer; + + t_draw_layer_display element_visibility = get_element_visibility_and_transparency(driver_block_layer_num, sink_block_layer_num); + + if (!element_visibility.visible) { + continue; /* Don't Draw */ + } + transparency_factor = element_visibility.alpha; + + //Take the higher of the 2 transparency values that the user can select from the UI + // Compare the current cross layer transparency to the overall Net transparency set by the user. + g->set_color(draw_state->net_color[net_id], fmin(transparency_factor, draw_state->net_color[net_id].alpha * NET_ALPHA)); + + ezgl::point2d sink_center = draw_coords->get_absolute_clb_bbox(b2, cluster_ctx.clb_nlist.block_type(b2)).center(); g->draw_line(driver_center, sink_center); /* Uncomment to draw a chain instead of a star. */ /* driver_center = sink_center; */ @@ -328,6 +360,10 @@ void draw_congestion(ezgl::renderer* g) { //Draw each congested node for (RRNodeId inode : congested_rr_nodes) { + int layer_num = rr_graph.node_layer(inode); + int transparency_factor = get_rr_node_transparency(inode); + if (!draw_state->draw_layer_display[layer_num].visible) + continue; short occ = route_ctx.rr_node_route_inf[inode].occ(); short capacity = rr_graph.node_capacity(inode); @@ -337,6 +373,7 @@ void draw_congestion(ezgl::renderer* g) { VTR_ASSERT(node_congested); ezgl::color color = to_ezgl_color(cmap->color(congestion_ratio)); + color.alpha = transparency_factor; switch (rr_graph.node_type(inode)) { case CHANX: //fallthrough @@ -616,11 +653,17 @@ void draw_partial_route(const std::vector& rr_nodes_to_draw, ezgl::ren auto rr_type = rr_graph.node_type(inode); RRNodeId prev_node = rr_nodes_to_draw[i - 1]; + RRNodeId prev_rr_node = prev_node; auto prev_type = rr_graph.node_type(RRNodeId(prev_node)); auto iedge = find_edge(prev_node, inode); auto switch_type = rr_graph.edge_switch(RRNodeId(prev_node), iedge); + //Don't draw node if the layer of the node is not set to visible on screen + if (!draw_state->draw_layer_display[rr_graph.node_layer(inode)].visible) { + continue; + } + switch (rr_type) { case OPIN: { draw_rr_pin(inode, draw_state->draw_rr_node[inode].color, g); @@ -628,10 +671,12 @@ void draw_partial_route(const std::vector& rr_nodes_to_draw, ezgl::ren } case IPIN: { draw_rr_pin(inode, draw_state->draw_rr_node[inode].color, g); - if (rr_graph.node_type(prev_node) == OPIN) { - draw_pin_to_pin(prev_node, inode, g); - } else { - draw_pin_to_chan_edge(inode, prev_node, g); + if (is_edge_valid_to_draw(inode, prev_rr_node)) { + if (rr_graph.node_type(prev_node) == OPIN) { + draw_pin_to_pin(prev_node, inode, g); + } else { + draw_pin_to_chan_edge(inode, prev_node, g); + } } break; } @@ -640,24 +685,25 @@ void draw_partial_route(const std::vector& rr_nodes_to_draw, ezgl::ren chanx_track[rr_graph.node_xlow(inode)][rr_graph.node_ylow(inode)]++; draw_rr_chan(inode, draw_state->draw_rr_node[inode].color, g); - - switch (prev_type) { - case CHANX: { - draw_chanx_to_chanx_edge(prev_node, inode, switch_type, g); - break; - } - case CHANY: { - draw_chanx_to_chany_edge(inode, prev_node, FROM_Y_TO_X, switch_type, g); - break; - } - case OPIN: { - draw_pin_to_chan_edge(prev_node, inode, g); - break; - } - default: { - VPR_ERROR(VPR_ERROR_OTHER, - "Unexpected connection from an rr_node of type %d to one of type %d.\n", - prev_type, rr_type); + if (is_edge_valid_to_draw(inode, prev_rr_node)) { + switch (prev_type) { + case CHANX: { + draw_chanx_to_chanx_edge(prev_node, inode, switch_type, g); + break; + } + case CHANY: { + draw_chanx_to_chany_edge(inode, prev_node, FROM_Y_TO_X, switch_type, g); + break; + } + case OPIN: { + draw_pin_to_chan_edge(prev_node, inode, g); + break; + } + default: { + VPR_ERROR(VPR_ERROR_OTHER, + "Unexpected connection from an rr_node of type %d to one of type %d.\n", + prev_type, rr_type); + } } } @@ -669,26 +715,28 @@ void draw_partial_route(const std::vector& rr_nodes_to_draw, ezgl::ren draw_rr_chan(inode, draw_state->draw_rr_node[inode].color, g); - switch (prev_type) { - case CHANX: { - draw_chanx_to_chany_edge(prev_node, inode, - FROM_X_TO_Y, switch_type, g); - break; - } - case CHANY: { - draw_chany_to_chany_edge(RRNodeId(prev_node), RRNodeId(inode), - switch_type, g); - break; - } - case OPIN: { - draw_pin_to_chan_edge(prev_node, inode, g); - - break; - } - default: { - VPR_ERROR(VPR_ERROR_OTHER, - "Unexpected connection from an rr_node of type %d to one of type %d.\n", - prev_type, rr_type); + if (is_edge_valid_to_draw(inode, prev_rr_node)) { + switch (prev_type) { + case CHANX: { + draw_chanx_to_chany_edge(prev_node, inode, + FROM_X_TO_Y, switch_type, g); + break; + } + case CHANY: { + draw_chany_to_chany_edge(RRNodeId(prev_node), RRNodeId(inode), + switch_type, g); + break; + } + case OPIN: { + draw_pin_to_chan_edge(prev_node, inode, g); + + break; + } + default: { + VPR_ERROR(VPR_ERROR_OTHER, + "Unexpected connection from an rr_node of type %d to one of type %d.\n", + prev_type, rr_type); + } } } @@ -701,6 +749,28 @@ void draw_partial_route(const std::vector& rr_nodes_to_draw, ezgl::ren } } +/* Helper function that checks whether the edges between the current and previous nodes can be drawn + * based on whether the cross-layer connections option is enabled and whether the layer on which the + * nodes are located are enabled. + */ +bool is_edge_valid_to_draw(RRNodeId current_node, RRNodeId prev_node) { + t_draw_state* draw_state = get_draw_state_vars(); + auto& rr_graph = g_vpr_ctx.device().rr_graph; + + int current_node_layer = rr_graph.node_layer(current_node); + int prev_node_layer = rr_graph.node_layer(prev_node); + + if (current_node_layer != prev_node_layer) { + if (draw_state->cross_layer_display.visible && draw_state->draw_layer_display[current_node_layer].visible && draw_state->draw_layer_display[prev_node_layer].visible) { + return true; //if both layers are enabled and cross layer connections are enabled + } else { + return false; //if cross layer connections are disabled or if either the current or prev node's layers are disabled + } + } else { + return draw_state->draw_layer_display[current_node_layer].visible; //if both nodes are from the same layer + } +} + /* Draws any placement macros (e.g. carry chains, which require specific relative placements * between some blocks) if the Placement Macros (in the GUI) is seelected. */ @@ -774,8 +844,8 @@ void draw_routing_util(ezgl::renderer* g) { t_draw_coords* draw_coords = get_draw_coords_vars(); auto& device_ctx = g_vpr_ctx.device(); - auto chanx_usage = calculate_routing_usage(CHANX, draw_state->is_flat); - auto chany_usage = calculate_routing_usage(CHANY, draw_state->is_flat); + auto chanx_usage = calculate_routing_usage(CHANX, draw_state->is_flat, false); + auto chany_usage = calculate_routing_usage(CHANY, draw_state->is_flat, false); auto chanx_avail = calculate_routing_avail(CHANX); auto chany_avail = calculate_routing_avail(CHANY); @@ -965,6 +1035,7 @@ void draw_crit_path(ezgl::renderer* g) { for (tatum::TimingPathElem elem : path.data_arrival_path().elements()) { tatum::NodeId node = elem.node(); float arr_time = elem.tag().time(); + if (prev_node) { //We draw each 'edge' in a different color, this allows users to identify the stages and //any routing which corresponds to the edge @@ -974,19 +1045,42 @@ void draw_crit_path(ezgl::renderer* g) { % kelly_max_contrast_colors.size()]; float delay = arr_time - prev_arr_time; + + int src_block_layer = get_timing_path_node_layer_num(node); + int sink_block_layer = get_timing_path_node_layer_num(prev_node); + if (draw_state->show_crit_path == DRAW_CRIT_PATH_FLYLINES || draw_state->show_crit_path == DRAW_CRIT_PATH_FLYLINES_DELAYS) { - g->set_color(color); - g->set_line_dash(ezgl::line_dash::none); - g->set_line_width(4); - draw_flyline_timing_edge(tnode_draw_coord(prev_node), - tnode_draw_coord(node), delay, g); + // FLylines for critical path are drawn based on the layer visibility of the source and sink + if (is_flyline_valid_to_draw(src_block_layer, sink_block_layer)) { + g->set_color(color); + g->set_line_dash(ezgl::line_dash::none); + g->set_line_width(4); + draw_flyline_timing_edge(tnode_draw_coord(prev_node), + tnode_draw_coord(node), delay, g); + g->set_line_width(0); + } } else { VTR_ASSERT(draw_state->show_crit_path != DRAW_NO_CRIT_PATH); + // Draws critical path shown by both flylines and routed net connections. + //Draw the routed version of the timing edge - draw_routed_timing_edge(prev_node, node, delay, color, g); + draw_routed_timing_edge_connection(prev_node, node, color, g); + + // FLylines for critical path are drawn based on the layer visibility of the source and sink + if (is_flyline_valid_to_draw(src_block_layer, sink_block_layer)) { + g->set_line_dash(ezgl::line_dash::asymmetric_5_3); + g->set_line_width(3); + g->set_color(color); + + draw_flyline_timing_edge((ezgl::point2d)tnode_draw_coord(prev_node), + (ezgl::point2d)tnode_draw_coord(node), (float)delay, + (ezgl::renderer*)g); + g->set_line_dash(ezgl::line_dash::none); + g->set_line_width(0); + } } } prev_node = node; @@ -994,6 +1088,29 @@ void draw_crit_path(ezgl::renderer* g) { } } +int get_timing_path_node_layer_num(tatum::NodeId node) { + auto& place_ctx = g_vpr_ctx.placement(); + auto& atom_ctx = g_vpr_ctx.atom(); + + AtomPinId atom_pin = atom_ctx.lookup.tnode_atom_pin(node); + AtomBlockId atom_block = atom_ctx.nlist.pin_block(atom_pin); + ClusterBlockId clb_block = atom_ctx.lookup.atom_clb(atom_block); + return place_ctx.block_locs[clb_block].loc.layer; +} + +bool is_flyline_valid_to_draw(int src_layer, int sink_layer) { + t_draw_state* draw_state = get_draw_state_vars(); + + if (!draw_state->draw_layer_display[src_layer].visible || !draw_state->draw_layer_display[sink_layer].visible) { + return false; /* Don't Draw if either nodes are not on a currently visible layer in the UI*/ + } + if (src_layer != sink_layer && !draw_state->cross_layer_display.visible) { + return false; /* Don't Draw if cross layer option is off and nodes are on different layers*/ + } + + return true; +} + //Draws critical path shown as flylines. void draw_flyline_timing_edge(ezgl::point2d start, ezgl::point2d end, float incr_delay, ezgl::renderer* g) { g->draw_line(start, end); @@ -1065,26 +1182,6 @@ void draw_flyline_timing_edge(ezgl::point2d start, ezgl::point2d end, float incr } } -//Draws critical path shown by both flylines and routed net connections. -void draw_routed_timing_edge(tatum::NodeId start_tnode, - tatum::NodeId end_tnode, - float incr_delay, - ezgl::color color, - ezgl::renderer* g) { - draw_routed_timing_edge_connection(start_tnode, end_tnode, color, g); - - g->set_line_dash(ezgl::line_dash::asymmetric_5_3); - g->set_line_width(3); - g->set_color(color); - - draw_flyline_timing_edge((ezgl::point2d)tnode_draw_coord(start_tnode), - (ezgl::point2d)tnode_draw_coord(end_tnode), (float)incr_delay, - (ezgl::renderer*)g); - - g->set_line_width(0); - g->set_line_dash(ezgl::line_dash::none); -} - //Collect all the drawing locations associated with the timing edge between start and end void draw_routed_timing_edge_connection(tatum::NodeId src_tnode, tatum::NodeId sink_tnode, @@ -1151,6 +1248,7 @@ void draw_routed_timing_edge_connection(tatum::NodeId src_tnode, draw_state->draw_rr_node[inode].color = color; } + //draw_partial_route() takes care of layer visibility and cross-layer settings draw_partial_route(routed_rr_nodes, (ezgl::renderer*)g); } else { //Connection entirely within the CLB, we don't draw the internal routing so treat it as a fly-line diff --git a/vpr/src/draw/draw_basic.h b/vpr/src/draw/draw_basic.h index 4b2fad749c2..73082b158ba 100644 --- a/vpr/src/draw/draw_basic.h +++ b/vpr/src/draw/draw_basic.h @@ -41,7 +41,8 @@ # include "ezgl/graphics.hpp" /* Draws the blocks placed on the proper clbs. Occupied blocks are darker colours * - * while empty ones are lighter colours and have a dashed border. */ + * while empty ones are lighter colours and have a dashed border. * + * Blocks are drawn in layer order (so that semi-transparent blocks/grids render well)*/ void drawplace(ezgl::renderer* g); /* This routine draws the nets on the placement. The nets have not * @@ -72,6 +73,25 @@ void draw_routed_net(ParentNetId net, ezgl::renderer* g); void draw_partial_route(const std::vector& rr_nodes_to_draw, ezgl::renderer* g); +/** + * @brief Returns the layer number of a timing path node + * @param node + * @return layer number the node is situated on. + */ +int get_timing_path_node_layer_num(tatum::NodeId node); + +/** + * @brief Returns true if both the current_node and prev_node are on the same layer and it is visible, + * or they're on different layers that are both visible and cross-layer connections are visible. + * Otherwise returns false. + * + * @param current_node + * @param prev_node + * + * @return + */ +bool is_edge_valid_to_draw(RRNodeId current_node, RRNodeId prev_node); + /* Draws a heat map of routing wire utilization (i.e. fraction of wires used in each channel) * when a routing is shown on-screen and Routing Util (on the GUI) is selected. * Lighter colours (e.g. yellow) correspond to highly utilized @@ -86,18 +106,20 @@ void draw_routing_util(ezgl::renderer* g); */ void draw_crit_path(ezgl::renderer* g); +/** + * @brief Checks whether a flyline should be drawn or not based on the layer control settings in the UI + * @param src_layer + * @param sink_layer + * + * @return If the source and sink are on the same active(visible) layer - returns true + * If the source and sink are on different active layers & Cross-layer connections is toggled on - returns true + * Otherwise returns false + */ +bool is_flyline_valid_to_draw(int src_layer, int sink_layer); + /* Draws critical path shown as flylines. Takes in start and end coordinates, time delay, & renderer.*/ void draw_flyline_timing_edge(ezgl::point2d start, ezgl::point2d end, float incr_delay, ezgl::renderer* g); -/* Draws critical path shown by both flylines and routed net connections. Takes in start and end nodes, - * time delay, colour, & renderer. - */ -void draw_routed_timing_edge(tatum::NodeId start_tnode, - tatum::NodeId end_tnode, - float incr_delay, - ezgl::color color, - ezgl::renderer* g); - /* Collects all the drawing locations associated with the timing edge between start and end. * Only traces interconnect edges in detail, and treats all others as flylines. */ diff --git a/vpr/src/draw/draw_mux.cpp b/vpr/src/draw/draw_mux.cpp index e4e39588b8c..746af57d811 100644 --- a/vpr/src/draw/draw_mux.cpp +++ b/vpr/src/draw/draw_mux.cpp @@ -124,13 +124,12 @@ ezgl::rectangle draw_mux(ezgl::point2d origin, e_side orientation, float height, } /* Draws a mux with width = height * 0.4 and scale (slope of the muxes sides) = 0.6, labelled with its size. - * Takes in point of origin, orientation, height, mux size and renderer. */ -void draw_mux_with_size(ezgl::point2d origin, e_side orientation, float height, int size, ezgl::renderer* g) { - g->set_color(ezgl::YELLOW); +void draw_mux_with_size(ezgl::point2d origin, e_side orientation, float height, int size, int transparency_factor, ezgl::renderer* g) { + g->set_color(ezgl::YELLOW, transparency_factor); auto bounds = draw_mux(origin, orientation, height, g); - g->set_color(ezgl::BLACK); + g->set_color(ezgl::BLACK, transparency_factor); g->draw_text(bounds.center(), std::to_string(size), bounds.width(), bounds.height()); } diff --git a/vpr/src/draw/draw_mux.h b/vpr/src/draw/draw_mux.h index eb1cfb201d7..dbfcf2b4d30 100644 --- a/vpr/src/draw/draw_mux.h +++ b/vpr/src/draw/draw_mux.h @@ -37,10 +37,13 @@ # include "ezgl/application.hpp" # include "ezgl/graphics.hpp" -/* Draws a mux with width = height * 0.4 and scale (slope of the muxes sides) = 0.6, labelled with its size. +/** + * @brief Draws a mux with width = height * 0.4 and scale (slope of the muxes sides) = 0.6, labelled with its size. * Takes in point of origin, orientation, height, mux size and renderer. + * Also takes in transparency factor, based on the transparency of the layer the mux is to be drawn on + * (0 is opaque and 255 is transparent). */ -void draw_mux_with_size(ezgl::point2d origin, e_side orientation, float height, int size, ezgl::renderer* g); +void draw_mux_with_size(ezgl::point2d origin, e_side orientation, float height, int size, int transparency_factor, ezgl::renderer* g); /* Draws a mux with width = height * 0.4 and scale (slope of the muxes sides) = 0.6. * Takes in point of origin, orientation, height and renderer. diff --git a/vpr/src/draw/draw_noc.cpp b/vpr/src/draw/draw_noc.cpp index 771be541034..f664562f88a 100644 --- a/vpr/src/draw/draw_noc.cpp +++ b/vpr/src/draw/draw_noc.cpp @@ -186,17 +186,26 @@ ezgl::rectangle get_noc_connection_marker_bbox(const t_logical_block_type_ptr no */ void draw_noc_connection_marker(ezgl::renderer* g, const vtr::vector& router_list, ezgl::rectangle connection_marker_bbox) { t_draw_coords* draw_coords = get_draw_coords_vars(); - - //set the color of the marker - g->set_color(ezgl::BLACK); + t_draw_state* draw_state = get_draw_state_vars(); int router_grid_position_x = 0; int router_grid_position_y = 0; + int router_grid_position_layer = 0; ezgl::rectangle updated_connection_marker_bbox; // go through the routers and create the connection marker for (auto router = router_list.begin(); router != router_list.end(); router++) { + router_grid_position_layer = router->get_router_layer_position(); + + t_draw_layer_display marker_box_visibility = draw_state->draw_layer_display[router_grid_position_layer]; + if (!marker_box_visibility.visible) { + continue; /* Don't Draw marker box if not on visible layer*/ + } + + //set the color of the marker with the layer transparency + g->set_color(ezgl::BLACK, marker_box_visibility.alpha); + router_grid_position_x = router->get_router_grid_position_x(); router_grid_position_y = router->get_router_grid_position_y(); @@ -264,12 +273,21 @@ void draw_noc_links(ezgl::renderer* g, t_logical_block_type_ptr noc_router_logic source_router = noc_link_list[link_id].get_source_router(); sink_router = noc_link_list[link_id].get_sink_router(); - // calculate the grid positions of the source and sink routers + //Calculate the layer position of the source and sink routers source_router_layer_position = router_list[source_router].get_router_layer_position(); + sink_router_layer_position = router_list[sink_router].get_router_layer_position(); + + //Get visibility settings of the current NoC link based on the layer visibility settings set by the user + t_draw_layer_display noc_link_visibility = get_element_visibility_and_transparency(source_router_layer_position, sink_router_layer_position); + + if (!noc_link_visibility.visible) { + continue; /* Don't Draw link */ + } + + // calculate the grid positions of the source and sink routers source_router_x_position = router_list[source_router].get_router_grid_position_x(); source_router_y_position = router_list[source_router].get_router_grid_position_y(); - sink_router_layer_position = router_list[sink_router].get_router_layer_position(); sink_router_x_position = router_list[sink_router].get_router_grid_position_x(); sink_router_y_position = router_list[sink_router].get_router_grid_position_y(); @@ -284,7 +302,7 @@ void draw_noc_links(ezgl::renderer* g, t_logical_block_type_ptr noc_router_logic shift_noc_link(link_coords, list_of_noc_link_shift_directions[link_id], link_type, noc_connection_marker_quarter_width, noc_connection_marker_quarter_height); // set the color to draw the current link - g->set_color(noc_link_colors[link_id]); + g->set_color(noc_link_colors[link_id], noc_link_visibility.alpha); //draw a line between the center of the two routers this link connects g->draw_line(link_coords.start, link_coords.end); diff --git a/vpr/src/draw/draw_rr.cpp b/vpr/src/draw/draw_rr.cpp index 116942d9612..ad58bd40604 100644 --- a/vpr/src/draw/draw_rr.cpp +++ b/vpr/src/draw/draw_rr.cpp @@ -81,6 +81,8 @@ void draw_rr(ezgl::renderer* g) { g->set_line_dash(ezgl::line_dash::none); for (const RRNodeId inode : device_ctx.rr_graph.nodes()) { + int layer_num = rr_graph.node_layer(inode); + int transparency_factor = get_rr_node_transparency(inode); if (!draw_state->draw_rr_node[inode].node_highlighted) { /* If not highlighted node, assign color based on type. */ switch (rr_graph.node_type(inode)) { @@ -105,6 +107,11 @@ void draw_rr(ezgl::renderer* g) { } } + draw_state->draw_rr_node[inode].color.alpha = transparency_factor; + + if (!draw_state->draw_layer_display[layer_num].visible) + continue; // skip drawing if layer is not visible + /* Now call drawing routines to draw the node. */ switch (rr_graph.node_type(inode)) { case SINK: @@ -148,6 +155,8 @@ void draw_rr_chan(RRNodeId inode, const ezgl::color color, ezgl::renderer* g) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; + int transparency_factor = get_rr_node_transparency(inode); + t_rr_type type = rr_graph.node_type(inode); VTR_ASSERT(type == CHANX || type == CHANY); @@ -162,7 +171,7 @@ void draw_rr_chan(RRNodeId inode, const ezgl::color color, ezgl::renderer* g) { std::swap(start, end); } - g->set_color(color); + g->set_color(color, transparency_factor); if (color != DEFAULT_RR_NODE_COLOR) { // If wire is highlighted, then draw with thicker linewidth. g->set_line_width(3); @@ -234,7 +243,7 @@ void draw_rr_chan(RRNodeId inode, const ezgl::color color, ezgl::renderer* g) { if (switchpoint_min == 0) { if (dir != Direction::BIDIR) { //Draw a mux at the start of each wire, labelled with it's size (#inputs) - draw_mux_with_size(start, mux_dir, WIRE_DRAWING_WIDTH, rr_graph.node_fan_in(inode), g); + draw_mux_with_size(start, mux_dir, WIRE_DRAWING_WIDTH, rr_graph.node_fan_in(inode), transparency_factor, g); } } else { //Draw arrows and label with switch point @@ -242,10 +251,10 @@ void draw_rr_chan(RRNodeId inode, const ezgl::color color, ezgl::renderer* g) { std::swap(arrow_color, text_color); } - g->set_color(arrow_color); + g->set_color(arrow_color, transparency_factor); draw_triangle_along_line(g, arrow_loc_min, start, end); - g->set_color(text_color); + g->set_color(text_color, transparency_factor); ezgl::rectangle bbox(ezgl::point2d(arrow_loc_min.x - DEFAULT_ARROW_SIZE / 2, arrow_loc_min.y - DEFAULT_ARROW_SIZE / 4), ezgl::point2d(arrow_loc_min.x + DEFAULT_ARROW_SIZE / 2, arrow_loc_min.y + DEFAULT_ARROW_SIZE / 4)); ezgl::point2d center = bbox.center(); @@ -260,7 +269,7 @@ void draw_rr_chan(RRNodeId inode, const ezgl::color color, ezgl::renderer* g) { if (switchpoint_max == 0) { if (dir != Direction::BIDIR) { //Draw a mux at the start of each wire, labelled with it's size (#inputs) - draw_mux_with_size(start, mux_dir, WIRE_DRAWING_WIDTH, rr_graph.node_fan_in(inode), g); + draw_mux_with_size(start, mux_dir, WIRE_DRAWING_WIDTH, rr_graph.node_fan_in(inode), transparency_factor, g); } } else { //Draw arrows and label with switch point @@ -268,10 +277,10 @@ void draw_rr_chan(RRNodeId inode, const ezgl::color color, ezgl::renderer* g) { std::swap(arrow_color, text_color); } - g->set_color(arrow_color); + g->set_color(arrow_color, transparency_factor); draw_triangle_along_line(g, arrow_loc_max, start, end); - g->set_color(text_color); + g->set_color(text_color, transparency_factor); ezgl::rectangle bbox(ezgl::point2d(arrow_loc_max.x - DEFAULT_ARROW_SIZE / 2, arrow_loc_max.y - DEFAULT_ARROW_SIZE / 4), ezgl::point2d(arrow_loc_max.x + DEFAULT_ARROW_SIZE / 2, arrow_loc_max.y + DEFAULT_ARROW_SIZE / 4)); ezgl::point2d center = bbox.center(); @@ -283,7 +292,7 @@ void draw_rr_chan(RRNodeId inode, const ezgl::color color, ezgl::renderer* g) { } } } - g->set_color(color); //Ensure color is still set correctly if we drew any arrows/text + g->set_color(color, transparency_factor); //Ensure color is still set correctly if we drew any arrows/text } /* Draws all the edges that the user wants shown between inode and what it @@ -292,7 +301,9 @@ void draw_rr_edges(RRNodeId inode, ezgl::renderer* g) { t_draw_state* draw_state = get_draw_state_vars(); auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - auto rr_node = RRNodeId(inode); + auto rr_node = inode; + + int transparency_factor = get_rr_node_transparency(rr_node); t_rr_type from_type, to_type; short switch_type; @@ -310,33 +321,36 @@ void draw_rr_edges(RRNodeId inode, ezgl::renderer* g) { to_type = rr_graph.node_type(to_node); bool edge_configurable = rr_graph.edge_is_configurable(inode, iedge); + if (!is_edge_valid_to_draw(RRNodeId(to_node), rr_node)) + continue; // skip drawing if edge is not valid to draw + switch (from_type) { case OPIN: switch (to_type) { case CHANX: case CHANY: - if (draw_state->draw_rr_node[inode].color == ezgl::MAGENTA) { + if (rgb_is_same(draw_state->draw_rr_node[inode].color, ezgl::MAGENTA)) { // If OPIN was clicked on, set color to fan-out ezgl::color color = draw_state->draw_rr_node[to_node].color; - g->set_color(color); - } else if (draw_state->draw_rr_node[to_node].color == ezgl::MAGENTA) { + g->set_color(color, transparency_factor); + } else if (rgb_is_same(draw_state->draw_rr_node[to_node].color, ezgl::MAGENTA)) { // If CHANX or CHANY got clicked, set color to fan-in ezgl::color color = draw_state->draw_rr_node[inode].color; - g->set_color(color); + g->set_color(color, transparency_factor); } else { - g->set_color(ezgl::PINK); + g->set_color(ezgl::PINK, transparency_factor); } draw_pin_to_chan_edge(inode, to_node, g); break; case IPIN: - if (draw_state->draw_rr_node[inode].color == ezgl::MAGENTA) { + if (rgb_is_same(draw_state->draw_rr_node[inode].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[to_node].color; - g->set_color(color); - } else if (draw_state->draw_rr_node[to_node].color == ezgl::MAGENTA) { + g->set_color(color, transparency_factor); + } else if (rgb_is_same(draw_state->draw_rr_node[to_node].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[inode].color; - g->set_color(color); + g->set_color(color, transparency_factor); } else { - g->set_color(ezgl::MEDIUM_PURPLE); + g->set_color(ezgl::MEDIUM_PURPLE, transparency_factor); } draw_pin_to_pin(inode, to_node, g); break; @@ -363,30 +377,30 @@ void draw_rr_edges(RRNodeId inode, ezgl::renderer* g) { break; } - if (draw_state->draw_rr_node[inode].color == ezgl::MAGENTA) { + if (rgb_is_same(draw_state->draw_rr_node[inode].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[to_node].color; - g->set_color(color); - } else if (draw_state->draw_rr_node[to_node].color == ezgl::MAGENTA) { + g->set_color(color, transparency_factor); + } else if (rgb_is_same(draw_state->draw_rr_node[to_node].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[inode].color; - g->set_color(color); + g->set_color(color, transparency_factor); } else { - g->set_color(blk_LIGHTSKYBLUE); + g->set_color(blk_LIGHTSKYBLUE, transparency_factor); } draw_pin_to_chan_edge(to_node, inode, g); break; case CHANX: - if (draw_state->draw_rr_node[inode].color == ezgl::MAGENTA) { + if (rgb_is_same(draw_state->draw_rr_node[inode].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[to_node].color; - g->set_color(color); - } else if (draw_state->draw_rr_node[to_node].color == ezgl::MAGENTA) { + g->set_color(color, transparency_factor); + } else if (rgb_is_same(draw_state->draw_rr_node[to_node].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[inode].color; - g->set_color(color); + g->set_color(color, transparency_factor); } else if (!edge_configurable) { ezgl::color color = blk_DARKGREY; - g->set_color(color); + g->set_color(color, transparency_factor); } else { - g->set_color(blk_DARKGREEN); + g->set_color(blk_DARKGREEN, transparency_factor); } switch_type = rr_graph.edge_switch(rr_node, iedge); draw_chanx_to_chanx_edge(rr_node, RRNodeId(to_node), @@ -394,16 +408,16 @@ void draw_rr_edges(RRNodeId inode, ezgl::renderer* g) { break; case CHANY: - if (draw_state->draw_rr_node[inode].color == ezgl::MAGENTA) { + if (rgb_is_same(draw_state->draw_rr_node[inode].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[to_node].color; - g->set_color(color); - } else if (draw_state->draw_rr_node[to_node].color == ezgl::MAGENTA) { + g->set_color(color, transparency_factor); + } else if (rgb_is_same(draw_state->draw_rr_node[to_node].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[inode].color; - g->set_color(color); + g->set_color(color, transparency_factor); } else if (!edge_configurable) { - g->set_color(blk_DARKGREY); + g->set_color(blk_DARKGREY, transparency_factor); } else { - g->set_color(blk_DARKGREEN); + g->set_color(blk_DARKGREEN, transparency_factor); } switch_type = rr_graph.edge_switch(rr_node, iedge); draw_chanx_to_chany_edge(inode, to_node, @@ -433,30 +447,30 @@ void draw_rr_edges(RRNodeId inode, ezgl::renderer* g) { break; } - if (draw_state->draw_rr_node[inode].color == ezgl::MAGENTA) { + if (rgb_is_same(draw_state->draw_rr_node[inode].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[to_node].color; - g->set_color(color); - } else if (draw_state->draw_rr_node[to_node].color == ezgl::MAGENTA) { + g->set_color(color, transparency_factor); + } else if (rgb_is_same(draw_state->draw_rr_node[to_node].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[inode].color; - g->set_color(color); + g->set_color(color, transparency_factor); } else { - g->set_color(blk_LIGHTSKYBLUE); + g->set_color(blk_LIGHTSKYBLUE, transparency_factor); } draw_pin_to_chan_edge(to_node, inode, g); break; case CHANX: - if (draw_state->draw_rr_node[inode].color == ezgl::MAGENTA) { + if (rgb_is_same(draw_state->draw_rr_node[inode].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[to_node].color; - g->set_color(color); - } else if (draw_state->draw_rr_node[to_node].color == ezgl::MAGENTA) { + g->set_color(color, transparency_factor); + } else if (rgb_is_same(draw_state->draw_rr_node[to_node].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[inode].color; - g->set_color(color); + g->set_color(color, transparency_factor); } else if (!edge_configurable) { ezgl::color color = blk_DARKGREY; - g->set_color(color); + g->set_color(color, transparency_factor); } else { - g->set_color(blk_DARKGREEN); + g->set_color(blk_DARKGREEN, transparency_factor); } switch_type = rr_graph.edge_switch(rr_node, iedge); draw_chanx_to_chany_edge(to_node, inode, @@ -464,17 +478,17 @@ void draw_rr_edges(RRNodeId inode, ezgl::renderer* g) { break; case CHANY: - if (draw_state->draw_rr_node[inode].color == ezgl::MAGENTA) { + if (rgb_is_same(draw_state->draw_rr_node[inode].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[to_node].color; - g->set_color(color); - } else if (draw_state->draw_rr_node[to_node].color == ezgl::MAGENTA) { + g->set_color(color, transparency_factor); + } else if (rgb_is_same(draw_state->draw_rr_node[to_node].color, ezgl::MAGENTA)) { ezgl::color color = draw_state->draw_rr_node[inode].color; - g->set_color(color); + g->set_color(color, transparency_factor); } else if (!edge_configurable) { ezgl::color color = blk_DARKGREY; - g->set_color(color); + g->set_color(color, transparency_factor); } else { - g->set_color(blk_DARKGREEN); + g->set_color(blk_DARKGREEN, transparency_factor); } switch_type = rr_graph.edge_switch(rr_node, iedge); draw_chany_to_chany_edge(rr_node, RRNodeId(to_node), @@ -491,7 +505,7 @@ void draw_rr_edges(RRNodeId inode, ezgl::renderer* g) { case IPIN: // from_type switch (to_type) { case SINK: - g->set_color(ezgl::DARK_SLATE_BLUE); + g->set_color(ezgl::DARK_SLATE_BLUE, transparency_factor); draw_pin_to_sink(inode, to_node, g); break; @@ -505,7 +519,7 @@ void draw_rr_edges(RRNodeId inode, ezgl::renderer* g) { case SOURCE: // from_type switch (to_type) { case OPIN: - g->set_color(ezgl::PLUM); + g->set_color(ezgl::PLUM, transparency_factor); draw_source_to_pin(inode, to_node, g); break; @@ -538,7 +552,9 @@ void draw_rr_pin(RRNodeId inode, const ezgl::color& color, ezgl::renderer* g) { int ipin = rr_graph.node_pin_num(RRNodeId(inode)); - g->set_color(color); + int transparency_factor = get_rr_node_transparency(inode); + + g->set_color(color, transparency_factor); /* TODO: This is where we can hide fringe physical pins and also identify globals (hide, color, show) */ /* As nodes may appear on more than one side, walk through the possible nodes @@ -553,10 +569,10 @@ void draw_rr_pin(RRNodeId inode, const ezgl::color& color, ezgl::renderer* g) { {xcen - draw_coords->pin_size, ycen - draw_coords->pin_size}, {xcen + draw_coords->pin_size, ycen + draw_coords->pin_size}); sprintf(str, "%d", ipin); - g->set_color(ezgl::BLACK); + g->set_color(ezgl::BLACK, transparency_factor); g->draw_text({xcen, ycen}, str, 2 * draw_coords->pin_size, 2 * draw_coords->pin_size); - g->set_color(color); + g->set_color(color, transparency_factor); } } @@ -566,10 +582,12 @@ void draw_rr_src_sink(RRNodeId inode, ezgl::color color, ezgl::renderer* g) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; + int transparency_factor = get_rr_node_transparency(inode); + float xcen, ycen; draw_get_rr_src_sink_coords(rr_graph.rr_nodes()[size_t(inode)], &xcen, &ycen); - g->set_color(color); + g->set_color(color, transparency_factor); g->fill_rectangle( {xcen - draw_coords->pin_size, ycen - draw_coords->pin_size}, @@ -577,10 +595,10 @@ void draw_rr_src_sink(RRNodeId inode, ezgl::color color, ezgl::renderer* g) { std::string str = vtr::string_fmt("%d", rr_graph.node_class_num(inode)); - g->set_color(ezgl::BLACK); + g->set_color(ezgl::BLACK, transparency_factor); g->draw_text({xcen, ycen}, str.c_str(), 2 * draw_coords->pin_size, 2 * draw_coords->pin_size); - g->set_color(color); + g->set_color(color, transparency_factor); } void draw_get_rr_src_sink_coords(const t_rr_node& node, float* xcen, float* ycen) { @@ -676,20 +694,17 @@ RRNodeId draw_check_rr_node_hit(float click_x, float click_y) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - //TODO: Change when graphics supports 3D FPGAs - VTR_ASSERT(device_ctx.grid.get_num_layers() == 1); - int layer_num = 0; - - for (const RRNodeId inode : device_ctx.rr_graph.nodes()) { + for (const RRNodeId& inode : device_ctx.rr_graph.nodes()) { switch (rr_graph.node_type(inode)) { case IPIN: case OPIN: { - t_physical_tile_loc tile_loc = {rr_graph.node_xlow(inode), rr_graph.node_ylow(inode), layer_num}; - t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type(tile_loc); - int width_offset = device_ctx.grid.get_width_offset(tile_loc); - int height_offset = device_ctx.grid.get_height_offset(tile_loc); + int i = rr_graph.node_xlow(inode); + int j = rr_graph.node_ylow(inode); + int layer_num = rr_graph.node_layer(inode); + t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type({i, j, layer_num}); + int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num}); + int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num}); int ipin = rr_graph.node_pin_num(inode); - float xcen, ycen; for (const e_side& iside : SIDES) { // If pin exists on this side of the block, then get pin coordinates @@ -800,7 +815,15 @@ void draw_rr_costs(ezgl::renderer* g, const vtr::vector& rr_cos float cost = rr_costs[inode]; if (std::isnan(cost)) continue; + int transparency_factor = get_rr_node_transparency(inode); + + // continue if rr_node layer is not visible + int layer_num = rr_graph.node_layer(inode); + if (!draw_state->draw_layer_display[layer_num].visible) + continue; + ezgl::color color = to_ezgl_color(cmap->color(cost)); + color.alpha = transparency_factor; switch (rr_graph.node_type(inode)) { case CHANX: //fallthrough @@ -899,4 +922,13 @@ void draw_get_rr_pin_coords(const t_rr_node& node, float* xcen, float* ycen, con *ycen = yc; } +int get_rr_node_transparency(RRNodeId rr_node) { + t_draw_state* draw_state = get_draw_state_vars(); + auto& device_ctx = g_vpr_ctx.device(); + const auto& rr_graph = device_ctx.rr_graph; + + int layer_num = rr_graph.node_layer(rr_node); + + return draw_state->draw_layer_display[layer_num].alpha; +} #endif diff --git a/vpr/src/draw/draw_rr.h b/vpr/src/draw/draw_rr.h index 63e12398792..49e0949271b 100644 --- a/vpr/src/draw/draw_rr.h +++ b/vpr/src/draw/draw_rr.h @@ -97,5 +97,11 @@ void draw_get_rr_pin_coords(RRNodeId inode, float* xcen, float* ycen, const e_si * the physical pin is on. */ void draw_get_rr_pin_coords(const t_rr_node& node, float* xcen, float* ycen, const e_side& pin_side); +/** + * @brief returns transparency, given rr node + * Checks the layer transparency of the given rr node and returns it + * + */ +int get_rr_node_transparency(RRNodeId rr_node); #endif /* NO_GRAPHICS */ #endif /* DRAW_RR_H */ diff --git a/vpr/src/draw/draw_toggle_functions.cpp b/vpr/src/draw/draw_toggle_functions.cpp index fe95d0d21ed..0f69b4c6087 100644 --- a/vpr/src/draw/draw_toggle_functions.cpp +++ b/vpr/src/draw/draw_toggle_functions.cpp @@ -442,8 +442,99 @@ void set_net_max_fanout_cbk(GtkSpinButton* self, ezgl::application* app) { */ void set_net_alpha_value_cbk(GtkSpinButton* self, ezgl::application* app) { t_draw_state* draw_state = get_draw_state_vars(); - draw_state->net_alpha = gtk_spin_button_get_value_as_int(self); + draw_state->net_alpha = (255 - gtk_spin_button_get_value_as_int(self)) / 255.0; app->refresh_drawing(); } +/** + * @brief Callback function for 3d layer checkboxes + */ +void select_layer_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/) { + t_draw_state* draw_state = get_draw_state_vars(); + + GtkWidget* parent = gtk_widget_get_parent(widget); + GtkBox* box = GTK_BOX(parent); + + GList* children = gtk_container_get_children(GTK_CONTAINER(box)); + int index = 0; + // Iterate over the checkboxes + for (GList* iter = children; iter != NULL; iter = g_list_next(iter)) { + if (GTK_IS_CHECK_BUTTON(iter->data)) { + GtkWidget* checkbox = GTK_WIDGET(iter->data); + gboolean state = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(checkbox)); + const gchar* name = gtk_button_get_label(GTK_BUTTON(checkbox)); + + // Only iterate through checkboxes with name "Layer ...", skip Cross Layer Connection + if (std::string(name).find("Layer") != std::string::npos + && std::string(name).find("Cross") == std::string::npos) { + // Change the the boolean of the draw_layer_display vector depending on checkbox + if (state) { + draw_state->draw_layer_display[index].visible = true; + } else { + draw_state->draw_layer_display[index].visible = false; + } + index++; + } + } + } + application.refresh_drawing(); + g_list_free(children); +} +/** + * @brief Callback function for 3d layer transparency spin buttons + */ +void transparency_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/) { + t_draw_state* draw_state = get_draw_state_vars(); + + GtkWidget* parent = gtk_widget_get_parent(widget); + GtkBox* box = GTK_BOX(parent); + GList* children = gtk_container_get_children(GTK_CONTAINER(box)); + + int index = 0; + // Iterate over transparency layers + for (GList* iter = children; iter != NULL; iter = g_list_next(iter)) { + if (GTK_IS_SPIN_BUTTON(iter->data)) { + GtkWidget* spin_button = GTK_WIDGET(iter->data); + const gchar* name = gtk_widget_get_name(spin_button); + + if (std::string(name).find("Transparency") != std::string::npos + && std::string(name).find("Cross") == std::string::npos) { + gint value = gtk_spin_button_get_value(GTK_SPIN_BUTTON(spin_button)); + draw_state->draw_layer_display[index].alpha = 255 - value; + index++; + } + } + } + application.refresh_drawing(); + g_list_free(children); +} + +/** + * @brief Callback function for cross layer connection checkbox + */ +void cross_layer_checkbox_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/) { + t_draw_state* draw_state = get_draw_state_vars(); + + gboolean state = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(widget)); + + if (state) { + draw_state->cross_layer_display.visible = true; + } else { + draw_state->cross_layer_display.visible = false; + } + + application.refresh_drawing(); +} + +/** + * @brief Callback function for cross layer connection spin button + */ +void cross_layer_transparency_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/) { + t_draw_state* draw_state = get_draw_state_vars(); + + gint value = gtk_spin_button_get_value(GTK_SPIN_BUTTON(widget)); + draw_state->cross_layer_display.alpha = 255 - value; + + application.refresh_drawing(); +} #endif diff --git a/vpr/src/draw/draw_toggle_functions.h b/vpr/src/draw/draw_toggle_functions.h index 42b1745f5c5..6c256cc2310 100644 --- a/vpr/src/draw/draw_toggle_functions.h +++ b/vpr/src/draw/draw_toggle_functions.h @@ -127,5 +127,37 @@ void net_max_fanout(GtkWidget* /*widget*/, gint /*response_id*/, gpointer /*data * its corresponding value in the UI. */ void set_net_alpha_value(GtkWidget* /*widget*/, gint /*response_id*/, gpointer /*data*/); +/** + * @brief Callback function for 3d layer checkboxes + * Updates draw_state->draw_layer_display based on which checkboxes are checked + * + * @param widget: pointer to the gtk widget for 3d layer checkboxes + */ +void select_layer_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/); + +/** + * @brief Callback function for 3d layer transparency spin buttons + * Updates draw_state->draw_layer_display based on the values in spin buttons + * + * @param widget: gtk widget for layer transparency spin buttons + */ +void transparency_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/); + +/** + * @brief Callback function for cross layer connection checkbox + * Updates draw_state->cross_layer_display.visible based on whether the cross layer + * connection checkbox is checked. + * + * @param widget: gtk widget for the cross layer connection checkbox + */ +void cross_layer_checkbox_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/); + +/** + * @brief Callback function for cross layer connection spin button + * Updates draw_state->cross_layer_display.alpha based spin button value + * + * @param widget: gtk widget for the cross layer connection transparency spin button + */ +void cross_layer_transparency_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/); #endif /* NO_GRAPHICS */ #endif /* DRAW_TOGGLE_FUNCTIONS_H */ diff --git a/vpr/src/draw/draw_types.h b/vpr/src/draw/draw_types.h index c014740e374..4750dc37dfe 100644 --- a/vpr/src/draw/draw_types.h +++ b/vpr/src/draw/draw_types.h @@ -143,6 +143,20 @@ typedef struct { bool node_highlighted; } t_draw_rr_node; +/** + * @brief Structure used to store visibility and transparency state information for a specific layer (die) in the FPGA. + * This structure is also used to store the state information of the cross-layer connections option in the UI. + */ +struct t_draw_layer_display { + ///@brief Whether the current layer should be visible. + bool visible = false; + + ///@brief Transparency value ( 0 - transparent, 255 - Opaque) + ///@note The UI has the opposite definition to make it more intuitive for the user, + /// where increasing the value increases transparency. (255 - transparent, 0 - Opaque) + int alpha = 255; +}; + /** * @brief Structure used to store variables related to highlighting/drawing * @@ -272,6 +286,12 @@ struct t_draw_state { std::vector list_of_breakpoints; + ///@brief Stores visibility and transparency drawing controls for each layer [0 ... grid.num_layers -1] + std::vector draw_layer_display; + + ///@brief Visibility and transparency for elements that cross die layers + t_draw_layer_display cross_layer_display; + ///@brief base of save graphics file name (i.e before extension) std::string save_graphics_file_base = "vpr"; diff --git a/vpr/src/draw/intra_logic_block.cpp b/vpr/src/draw/intra_logic_block.cpp index 285ade3c027..b333c687573 100644 --- a/vpr/src/draw/intra_logic_block.cpp +++ b/vpr/src/draw/intra_logic_block.cpp @@ -154,37 +154,38 @@ void draw_internal_draw_subblk(ezgl::renderer* g) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.placement(); - //TODO: Change when graphics supports 3D FPGAs - VTR_ASSERT(device_ctx.grid.get_num_layers() == 1); - int layer_num = 0; - for (int i = 0; i < (int)device_ctx.grid.width(); i++) { - for (int j = 0; j < (int)device_ctx.grid.height(); j++) { - /* Only the first block of a group should control drawing */ - const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num}); - int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num}); - int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num}); - - if (width_offset > 0 || height_offset > 0) - continue; - - /* Don't draw if tile is empty. This includes corners. */ - if (is_empty_type(type)) - continue; - - int num_sub_tiles = type->capacity; - for (int k = 0; k < num_sub_tiles; ++k) { - /* Don't draw if block is empty. */ - // TODO: Change when graphics supports 3D - if (place_ctx.grid_blocks.block_at_location({i, j, k, 0}) == EMPTY_BLOCK_ID || place_ctx.grid_blocks.block_at_location({i, j, k, 0}) == INVALID_BLOCK_ID) - continue; - - /* Get block ID */ - // TODO: Change when graphics supports 3D - ClusterBlockId bnum = place_ctx.grid_blocks.block_at_location({i, j, k, 0}); - /* Safety check, that physical blocks exists in the CLB */ - if (cluster_ctx.clb_nlist.block_pb(bnum) == nullptr) - continue; - draw_internal_pb(bnum, cluster_ctx.clb_nlist.block_pb(bnum), ezgl::rectangle({0, 0}, 0, 0), cluster_ctx.clb_nlist.block_type(bnum), g); + int total_layer_num = device_ctx.grid.get_num_layers(); + + for (int layer_num = 0; layer_num < total_layer_num; layer_num++) { + if (draw_state->draw_layer_display[layer_num].visible) { + for (int i = 0; i < (int)device_ctx.grid.width(); i++) { + for (int j = 0; j < (int)device_ctx.grid.height(); j++) { + /* Only the first block of a group should control drawing */ + const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num}); + int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num}); + int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num}); + + if (width_offset > 0 || height_offset > 0) + continue; + + /* Don't draw if tile is empty. This includes corners. */ + if (is_empty_type(type)) + continue; + + int num_sub_tiles = type->capacity; + for (int k = 0; k < num_sub_tiles; ++k) { + /* Don't draw if block is empty. */ + if (place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}) == EMPTY_BLOCK_ID || place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}) == INVALID_BLOCK_ID) + continue; + + /* Get block ID */ + ClusterBlockId bnum = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}); + /* Safety check, that physical blocks exists in the CLB */ + if (cluster_ctx.clb_nlist.block_pb(bnum) == nullptr) + continue; + draw_internal_pb(bnum, cluster_ctx.clb_nlist.block_pb(bnum), ezgl::rectangle({0, 0}, 0, 0), cluster_ctx.clb_nlist.block_type(bnum), g); + } + } } } } @@ -340,12 +341,18 @@ draw_internal_calc_coords(int type_descrip_index, t_pb_graph_node* pb_graph_node static void draw_internal_pb(const ClusterBlockId clb_index, t_pb* pb, const ezgl::rectangle& parent_bbox, const t_logical_block_type_ptr type, ezgl::renderer* g) { t_draw_coords* draw_coords = get_draw_coords_vars(); t_draw_state* draw_state = get_draw_state_vars(); + + auto& place_ctx = g_vpr_ctx.placement(); + t_selected_sub_block_info& sel_sub_info = get_selected_sub_block_info(); t_pb_type* pb_type = pb->pb_graph_node->pb_type; ezgl::rectangle temp = draw_coords->get_pb_bbox(clb_index, *pb->pb_graph_node); ezgl::rectangle abs_bbox = temp + parent_bbox.bottom_left(); + int layer_num = place_ctx.block_locs[clb_index].loc.layer; + int transparency_factor = draw_state->draw_layer_display[layer_num].alpha; + // if we've gone too far, don't draw anything if (pb_type->depth > draw_state->show_blk_internal) { return; @@ -359,13 +366,13 @@ static void draw_internal_pb(const ClusterBlockId clb_index, t_pb* pb, const ezg // determine default background color if (sel_sub_info.is_selected(pb->pb_graph_node, clb_index)) { - g->set_color(SELECTED_COLOR); + g->set_color(SELECTED_COLOR, transparency_factor); } else if (sel_sub_info.is_sink_of_selected(pb->pb_graph_node, clb_index)) { - g->set_color(DRIVES_IT_COLOR); + g->set_color(DRIVES_IT_COLOR, transparency_factor); } else if (sel_sub_info.is_source_of_selected(pb->pb_graph_node, clb_index)) { - g->set_color(DRIVEN_BY_IT_COLOR); + g->set_color(DRIVEN_BY_IT_COLOR, transparency_factor); } else { - g->set_color(draw_state->block_color(clb_index)); + g->set_color(draw_state->block_color(clb_index), transparency_factor); } } else { // If block is not used, draw as empty block (ie. white @@ -375,7 +382,7 @@ static void draw_internal_pb(const ClusterBlockId clb_index, t_pb* pb, const ezg g->set_color(ezgl::WHITE); } g->fill_rectangle(abs_bbox); - g->set_color(ezgl::BLACK); + g->set_color(ezgl::BLACK, transparency_factor); if (draw_state->draw_block_outlines) { g->draw_rectangle(abs_bbox); @@ -553,11 +560,13 @@ void draw_logical_connections(ezgl::renderer* g) { t_draw_state* draw_state = get_draw_state_vars(); auto& atom_ctx = g_vpr_ctx.atom(); + auto& place_ctx = g_vpr_ctx.placement(); g->set_line_dash(ezgl::line_dash::none); //constexpr float NET_ALPHA = 0.0275; float NET_ALPHA = draw_state->net_alpha; + int transparency_factor; // iterate over all the atom nets for (auto net_id : atom_ctx.nlist.nets()) { @@ -567,8 +576,15 @@ void draw_logical_connections(ezgl::renderer* g) { AtomPinId driver_pin_id = atom_ctx.nlist.net_driver(net_id); AtomBlockId src_blk_id = atom_ctx.nlist.pin_block(driver_pin_id); - const t_pb_graph_node* src_pb_gnode = atom_ctx.lookup.atom_pb_graph_node(src_blk_id); ClusterBlockId src_clb = atom_ctx.lookup.atom_clb(src_blk_id); + + int src_layer_num = place_ctx.block_locs[src_clb].loc.layer; + //To only show primitive nets that are connected to currently active layers on the screen + if (!draw_state->draw_layer_display[src_layer_num].visible) { + continue; /* Don't Draw */ + } + + const t_pb_graph_node* src_pb_gnode = atom_ctx.lookup.atom_pb_graph_node(src_blk_id); bool src_is_selected = sel_subblk_info.is_in_selected_subtree(src_pb_gnode, src_clb); bool src_is_src_of_selected = sel_subblk_info.is_source_of_selected(src_pb_gnode, src_clb); @@ -577,15 +593,26 @@ void draw_logical_connections(ezgl::renderer* g) { AtomBlockId sink_blk_id = atom_ctx.nlist.pin_block(sink_pin_id); const t_pb_graph_node* sink_pb_gnode = atom_ctx.lookup.atom_pb_graph_node(sink_blk_id); ClusterBlockId sink_clb = atom_ctx.lookup.atom_clb(sink_blk_id); + int sink_layer_num = place_ctx.block_locs[sink_clb].loc.layer; + + t_draw_layer_display element_visibility = get_element_visibility_and_transparency(src_layer_num, sink_layer_num); + + if (!element_visibility.visible) { + continue; /* Don't Draw */ + } + + transparency_factor = element_visibility.alpha; + //color selection + //transparency factor is the most transparent of the 2 options that the user selects from the UI if (src_is_selected && sel_subblk_info.is_sink_of_selected(sink_pb_gnode, sink_clb)) { - g->set_color(DRIVES_IT_COLOR, DRIVES_IT_COLOR.alpha * NET_ALPHA); + g->set_color(DRIVES_IT_COLOR, fmin(transparency_factor, DRIVES_IT_COLOR.alpha * NET_ALPHA)); } else if (src_is_src_of_selected && sel_subblk_info.is_in_selected_subtree(sink_pb_gnode, sink_clb)) { - g->set_color(DRIVEN_BY_IT_COLOR, DRIVEN_BY_IT_COLOR.alpha * NET_ALPHA); + g->set_color(DRIVEN_BY_IT_COLOR, fmin(transparency_factor, DRIVEN_BY_IT_COLOR.alpha * NET_ALPHA)); } else if (draw_state->show_nets == DRAW_PRIMITIVE_NETS && (draw_state->showing_sub_blocks() || src_clb != sink_clb)) { - g->set_color(ezgl::BLACK, ezgl::BLACK.alpha * NET_ALPHA); // if showing all, draw the other ones in black + g->set_color(ezgl::BLACK, fmin(transparency_factor, ezgl::BLACK.alpha * NET_ALPHA)); // if showing all, draw the other ones in black } else { - continue; // not showing all, and not the sperified block, so skip + continue; // not showing all, and not the specified block, so skip } draw_one_logical_connection(driver_pin_id, sink_pin_id, g); diff --git a/vpr/src/draw/manual_moves.cpp b/vpr/src/draw/manual_moves.cpp index f2005c2bc6c..77551dbe505 100644 --- a/vpr/src/draw/manual_moves.cpp +++ b/vpr/src/draw/manual_moves.cpp @@ -16,7 +16,7 @@ #ifndef NO_GRAPHICS -void draw_manual_moves_window(std::string block_id) { +void draw_manual_moves_window(const std::string& block_id) { t_draw_state* draw_state = get_draw_state_vars(); if (!draw_state->manual_moves_state.manual_move_window_is_open) { @@ -249,7 +249,7 @@ void manual_move_cost_summary_dialog() { //If the user accepts the manual move case GTK_RESPONSE_ACCEPT: draw_state->manual_moves_state.manual_move_info.user_move_outcome = ACCEPTED; - application.update_message(msg.c_str()); + application.update_message(msg); break; //If the user rejects the manual move case GTK_RESPONSE_REJECT: @@ -282,14 +282,8 @@ void close_manual_moves_window() { draw_state->manual_moves_state.manual_move_window_is_open = false; } -bool string_is_a_number(std::string block_id) { - for (size_t i = 0; i < block_id.size(); i++) { - //Returns 0 if the string does not have characters from 0-9 - if (isdigit(block_id[i]) == 0) { - return false; - } - } - return true; +bool string_is_a_number(const std::string& block_id) { + return std::all_of(block_id.begin(), block_id.end(), isdigit); } //Updates ManualMovesInfo cost and placer move outcome variables. User_move_outcome is also updated. @@ -311,8 +305,8 @@ e_create_move manual_move_display_and_propose(ManualMoveGenerator& manual_move_g draw_manual_moves_window(""); update_screen(ScreenUpdatePriority::MAJOR, " ", PLACEMENT, nullptr); move_type = e_move_type::MANUAL_MOVE; - t_logical_block_type blk_type; //no need to specify block type in manual move "propose_move" function - return manual_move_generator.propose_move(blocks_affected, move_type, blk_type, rlim, placer_opts, criticalities); + t_propose_action proposed_action{move_type, -1}; //no need to specify block type in manual move "propose_move" function + return manual_move_generator.propose_move(blocks_affected, proposed_action, rlim, placer_opts, criticalities); } #endif /*NO_GRAPHICS*/ diff --git a/vpr/src/draw/manual_moves.h b/vpr/src/draw/manual_moves.h index 8e31446bae0..94d981a1e35 100644 --- a/vpr/src/draw/manual_moves.h +++ b/vpr/src/draw/manual_moves.h @@ -93,7 +93,7 @@ bool manual_move_is_selected(); * Window prompts the user for input: block id/name used as the from block in the move generator, x position, y position, and subtile position. * @param block_id: The block id is passed in if the user decides to highlight the block in the UI. If the user decides to manually input the block ID in the manual move window, the string will be empty and the block ID will later be assigned to ManualMovesState struct. */ -void draw_manual_moves_window(std::string block_id); +void draw_manual_moves_window(const std::string& block_id); /** * @brief Evaluates if the user input is valid and allowed. @@ -139,7 +139,7 @@ void close_manual_moves_window(); * * @return True if the string only contains numbers, false otherwise. */ -bool string_is_a_number(std::string block_id); +bool string_is_a_number(const std::string& block_id); /** * @brief Updates the ManualMovesState variable members. diff --git a/vpr/src/draw/ui_setup.cpp b/vpr/src/draw/ui_setup.cpp index 5aacd684aaa..61e05c9be3e 100644 --- a/vpr/src/draw/ui_setup.cpp +++ b/vpr/src/draw/ui_setup.cpp @@ -22,7 +22,6 @@ # include "ezgl/point.hpp" # include "ezgl/application.hpp" # include "ezgl/graphics.hpp" - void basic_button_setup(ezgl::application* app) { //button to enter window_mode, created in main.ui GtkButton* window = (GtkButton*)app->get_object("Window"); @@ -104,7 +103,7 @@ void block_button_setup(ezgl::application* app) { } } -/* +/** * @brief configures and connects signals/functions for routing buttons * * Connects signals/sets default values for toggleRRButton, ToggleCongestion, @@ -143,6 +142,67 @@ void routing_button_setup(ezgl::application* app) { show_widget("RoutingMenuButton", app); } +void view_button_setup(ezgl::application* app) { + int num_layers; + + auto& device_ctx = g_vpr_ctx.device(); + num_layers = device_ctx.grid.get_num_layers(); + + // Hide the button if we only have one layer + if (num_layers == 1) { + hide_widget("3DMenuButton", app); + } else { + GtkBox* box = GTK_BOX(app->get_object("LayerBox")); + GtkBox* trans_box = GTK_BOX(app->get_object("TransparencyBox")); + + // Create checkboxes and spin buttons for each layer + for (int i = 0; i < num_layers; i++) { + std::string label = "Layer " + std::to_string(i); + std::string trans_label = "Transparency " + std::to_string(i); + + GtkWidget* checkbox = gtk_check_button_new_with_label(label.c_str()); + // Add margins to checkboxes to match the transparency spin button height + gtk_widget_set_margin_top(checkbox, 7); + gtk_widget_set_margin_bottom(checkbox, 7); + + gtk_box_pack_start(GTK_BOX(box), checkbox, FALSE, FALSE, 0); + + GtkWidget* spin_button = gtk_spin_button_new_with_range(0, 255, 1); + gtk_widget_set_name(spin_button, g_strdup(trans_label.c_str())); + gtk_box_pack_start(GTK_BOX(trans_box), spin_button, FALSE, FALSE, 0); + + if (i == 0) { + // Set the initial state of the first checkbox to checked to represent the default view. + gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(checkbox), TRUE); + } + + g_signal_connect(checkbox, "toggled", G_CALLBACK(select_layer_cbk), app); + g_signal_connect(spin_button, "value-changed", G_CALLBACK(transparency_cbk), app); + } + + // Set up the final row for cross-layer connections + std::string label = "Cross Layer Connections"; + std::string trans_label = "CrossLayerConnectionsTransparency"; + + GtkWidget* checkbox = gtk_check_button_new_with_label(label.c_str()); + gtk_widget_set_margin_top(checkbox, 7); + gtk_widget_set_margin_bottom(checkbox, 7); + gtk_box_pack_start(GTK_BOX(box), checkbox, FALSE, FALSE, 0); + + GtkWidget* spin_button = gtk_spin_button_new_with_range(0, 255, 1); + gtk_widget_set_name(spin_button, g_strdup(trans_label.c_str())); + gtk_box_pack_start(GTK_BOX(trans_box), spin_button, FALSE, FALSE, 0); + + // Connect cross layer to callback function: + g_signal_connect(checkbox, "toggled", G_CALLBACK(cross_layer_checkbox_cbk), app); + g_signal_connect(spin_button, "value-changed", G_CALLBACK(cross_layer_transparency_cbk), app); + + // Make all widgets in the boxes appear + gtk_widget_show_all(GTK_WIDGET(box)); + gtk_widget_show_all(GTK_WIDGET(trans_box)); + } +} + /* * @brief Loads required data for search autocomplete, sets up special completion fn */ diff --git a/vpr/src/draw/ui_setup.h b/vpr/src/draw/ui_setup.h index 567b3ec8659..40f2045ed91 100644 --- a/vpr/src/draw/ui_setup.h +++ b/vpr/src/draw/ui_setup.h @@ -61,6 +61,13 @@ void search_setup(ezgl::application* app); */ void routing_button_setup(ezgl::application* app); +/** + * @brief configures and connects signals/functions for View buttons + * + * Determines how many layers there are and displays depending on number of layers + */ +void view_button_setup(ezgl::application* app); + /** * @brief connects critical path button to its cbk fn. Called in all setup options that show crit. path */ diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp index 51dc0959708..c0ee94cc7ce 100644 --- a/vpr/src/place/RL_agent_util.cpp +++ b/vpr/src/place/RL_agent_util.cpp @@ -2,9 +2,6 @@ #include "manual_move_generator.h" void create_move_generators(std::unique_ptr& move_generator, std::unique_ptr& move_generator2, const t_placer_opts& placer_opts, int move_lim) { - //extract available physical block types in the netlist - determine_agent_block_types(); - if (placer_opts.RL_agent_placement == false) { if (placer_opts.place_algorithm.is_timing_driven()) { VTR_LOG("Using static probabilities for choosing each move type\n"); @@ -45,7 +42,6 @@ void create_move_generators(std::unique_ptr& move_generator, std: * only move type. * * This state is activated late in the anneal and in the Quench */ - auto& place_ctx = g_vpr_ctx.placement(); int num_1st_state_avail_moves = placer_opts.place_algorithm.is_timing_driven() ? NUM_PL_1ST_STATE_MOVE_TYPES : NUM_PL_NONTIMING_MOVE_TYPES; int num_2nd_state_avail_moves = placer_opts.place_algorithm.is_timing_driven() ? NUM_PL_MOVE_TYPES : NUM_PL_NONTIMING_MOVE_TYPES; @@ -55,17 +51,20 @@ void create_move_generators(std::unique_ptr& move_generator, std: if (placer_opts.place_agent_space == e_agent_space::MOVE_BLOCK_TYPE) { VTR_LOG("Using simple RL 'Epsilon Greedy agent' for choosing move and block types\n"); karmed_bandit_agent1 = std::make_unique(num_1st_state_avail_moves, - place_ctx.agent_blk_type_to_phys_blk_type_map.size(), + e_agent_space::MOVE_BLOCK_TYPE, placer_opts.place_agent_epsilon); } else { VTR_LOG("Using simple RL 'Epsilon Greedy agent' for choosing move types\n"); karmed_bandit_agent1 = std::make_unique(num_1st_state_avail_moves, + e_agent_space::MOVE_TYPE, placer_opts.place_agent_epsilon); } karmed_bandit_agent1->set_step(placer_opts.place_agent_gamma, move_lim); move_generator = std::make_unique(karmed_bandit_agent1); //agent's 2nd state - karmed_bandit_agent2 = std::make_unique(num_2nd_state_avail_moves, placer_opts.place_agent_epsilon); + karmed_bandit_agent2 = std::make_unique(num_2nd_state_avail_moves, + e_agent_space::MOVE_TYPE, + placer_opts.place_agent_epsilon); karmed_bandit_agent2->set_step(placer_opts.place_agent_gamma, move_lim); move_generator2 = std::make_unique(karmed_bandit_agent2); } else { @@ -74,15 +73,17 @@ void create_move_generators(std::unique_ptr& move_generator, std: if (placer_opts.place_agent_space == e_agent_space::MOVE_BLOCK_TYPE) { VTR_LOG("Using simple RL 'Softmax agent' for choosing move and block types\n"); karmed_bandit_agent1 = std::make_unique(num_1st_state_avail_moves, - place_ctx.agent_blk_type_to_phys_blk_type_map.size()); + e_agent_space::MOVE_BLOCK_TYPE); } else { VTR_LOG("Using simple RL 'Softmax agent' for choosing move types\n"); - karmed_bandit_agent1 = std::make_unique(num_1st_state_avail_moves); + karmed_bandit_agent1 = std::make_unique(num_1st_state_avail_moves, + e_agent_space::MOVE_TYPE); } karmed_bandit_agent1->set_step(placer_opts.place_agent_gamma, move_lim); move_generator = std::make_unique(karmed_bandit_agent1); //agent's 2nd state - karmed_bandit_agent2 = std::make_unique(num_2nd_state_avail_moves); + karmed_bandit_agent2 = std::make_unique(num_2nd_state_avail_moves, + e_agent_space::MOVE_TYPE); karmed_bandit_agent2->set_step(placer_opts.place_agent_gamma, move_lim); move_generator2 = std::make_unique(karmed_bandit_agent2); } @@ -115,22 +116,4 @@ void update_move_generator(std::unique_ptr& move_generator, std:: else move_generator2 = std::move(current_move_generator); } -} - -void determine_agent_block_types() { - //Loop through all available logical block types and store the ones that exist in the netlist - auto& device_ctx = g_vpr_ctx.device(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_ctx = g_vpr_ctx.mutable_placement(); - int agent_type_index = 0; - for (auto itype : device_ctx.logical_block_types) { - if (itype.index == 0) //ignore empty type - continue; - auto blk_per_type = cluster_ctx.clb_nlist.blocks_per_type(itype); - if (blk_per_type.size() != 0) { - place_ctx.phys_blk_type_to_agent_blk_type_map.insert(std::pair(agent_type_index, itype.index)); - place_ctx.agent_blk_type_to_phys_blk_type_map.insert(std::pair(itype.index, agent_type_index)); - agent_type_index++; - } - } } \ No newline at end of file diff --git a/vpr/src/place/RL_agent_util.h b/vpr/src/place/RL_agent_util.h index b7e855b7f82..ebfee697850 100644 --- a/vpr/src/place/RL_agent_util.h +++ b/vpr/src/place/RL_agent_util.h @@ -30,10 +30,4 @@ void assign_current_move_generator(std::unique_ptr& move_generato * @ brief move the updated current_move_generator to its original move_Generator structure based on he placer_options and the agent state */ void update_move_generator(std::unique_ptr& move_generator, std::unique_ptr& move_generator2, e_agent_state agent_state, const t_placer_opts& placer_opts, bool in_quench, std::unique_ptr& current_move_generator); - -/** - * @ brief determine which block types used by the netlist and create a map between physical block types and agent block types (the ones that are used in the netlist) - */ -void determine_agent_block_types(); - #endif diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp index 22e2a4ed6a9..12291b0fdc7 100644 --- a/vpr/src/place/centroid_move_generator.cpp +++ b/vpr/src/place/centroid_move_generator.cpp @@ -5,9 +5,9 @@ #include "place_constraints.h" #include "move_utils.h" -e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) { +e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) { //Find a movable block based on blk_type - ClusterBlockId b_from = propose_block_to_move(blk_type, false, NULL, NULL); + ClusterBlockId b_from = propose_block_to_move(proposed_action.logical_blk_type_index, false, nullptr, nullptr); if (!b_from) { //No movable block found return e_create_move::ABORT; @@ -23,15 +23,14 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block auto grid_from_type = device_ctx.grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); - t_range_limiters range_limiters; - range_limiters.original_rlim = rlim; - range_limiters.dm_rlim = placer_opts.place_dm_rlim; - range_limiters.first_rlim = place_move_ctx.first_rlim; + t_range_limiters range_limiters{rlim, + place_move_ctx.first_rlim, + placer_opts.place_dm_rlim}; t_pl_loc to, centroid; /* Calculate the centroid location*/ - calculate_centroid_loc(b_from, false, centroid, NULL); + calculate_centroid_loc(b_from, false, centroid, nullptr); /* Find a location near the weighted centroid_loc */ if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) { diff --git a/vpr/src/place/centroid_move_generator.h b/vpr/src/place/centroid_move_generator.h index 24a7277ae1d..cbcbb883aea 100644 --- a/vpr/src/place/centroid_move_generator.h +++ b/vpr/src/place/centroid_move_generator.h @@ -13,7 +13,7 @@ * Returns its choices by filling in affected_blocks. */ class CentroidMoveGenerator : public MoveGenerator { - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/); + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) override; }; #endif diff --git a/vpr/src/place/critical_uniform_move_generator.cpp b/vpr/src/place/critical_uniform_move_generator.cpp index 32d531138a5..f85ce644c2e 100644 --- a/vpr/src/place/critical_uniform_move_generator.cpp +++ b/vpr/src/place/critical_uniform_move_generator.cpp @@ -3,11 +3,11 @@ #include "place_constraints.h" #include "move_utils.h" -e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) { +e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) { ClusterNetId net_from; int pin_from; //Find a movable block based on blk_type - ClusterBlockId b_from = propose_block_to_move(blk_type, true, &net_from, &pin_from); + ClusterBlockId b_from = propose_block_to_move(proposed_action.logical_blk_type_index, true, &net_from, &pin_from); auto& place_ctx = g_vpr_ctx.placement(); auto& cluster_ctx = g_vpr_ctx.clustering(); diff --git a/vpr/src/place/critical_uniform_move_generator.h b/vpr/src/place/critical_uniform_move_generator.h index 7190d08a95b..a5a08af7c3b 100644 --- a/vpr/src/place/critical_uniform_move_generator.h +++ b/vpr/src/place/critical_uniform_move_generator.h @@ -15,7 +15,7 @@ * Returns its choices by filling in affected_blocks. */ class CriticalUniformMoveGenerator : public MoveGenerator { - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/); + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) override; }; #endif diff --git a/vpr/src/place/directed_moves_util.cpp b/vpr/src/place/directed_moves_util.cpp index 4e1c3f618b1..db49fc88486 100644 --- a/vpr/src/place/directed_moves_util.cpp +++ b/vpr/src/place/directed_moves_util.cpp @@ -101,6 +101,6 @@ static std::map available_reward_function = { {"runtime_aware", RUNTIME_AWARE}, {"WLbiased_runtime_aware", WL_BIASED_RUNTIME_AWARE}}; -e_reward_function string_to_reward(std::string st) { +e_reward_function string_to_reward(const std::string& st) { return available_reward_function[st]; } diff --git a/vpr/src/place/directed_moves_util.h b/vpr/src/place/directed_moves_util.h index d706028dc04..11ef3df71c3 100644 --- a/vpr/src/place/directed_moves_util.h +++ b/vpr/src/place/directed_moves_util.h @@ -14,7 +14,7 @@ enum e_reward_function { WL_BIASED_RUNTIME_AWARE ///@ same as RUNTIME_AWARE but more biased to WL cost (the factor of the bias is REWARD_BB_TIMING_RELATIVE_WEIGHT) }; -e_reward_function string_to_reward(std::string st); +e_reward_function string_to_reward(const std::string& st); ///@brief Helper function that returns the x, y coordinates of a pin void get_coordinate_of_pin(ClusterPinId pin, t_physical_tile_loc& tile_loc); diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp index ee69aeda5f0..b9be05d892a 100644 --- a/vpr/src/place/feasible_region_move_generator.cpp +++ b/vpr/src/place/feasible_region_move_generator.cpp @@ -5,11 +5,11 @@ #include "place_constraints.h" #include "move_utils.h" -e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { +e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { ClusterNetId net_from; int pin_from; //Find a movable block based on blk_type - ClusterBlockId b_from = propose_block_to_move(blk_type, true, &net_from, &pin_from); + ClusterBlockId b_from = propose_block_to_move(proposed_action.logical_blk_type_index, true, &net_from, &pin_from); if (!b_from) { //No movable block found return e_create_move::ABORT; @@ -46,7 +46,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& place_move_ctx.Y_coord.push_back(place_ctx.block_locs[bnum].loc.y); } } - if (place_move_ctx.X_coord.size() != 0) { + if (!place_move_ctx.X_coord.empty()) { max_x = *(std::max_element(place_move_ctx.X_coord.begin(), place_move_ctx.X_coord.end())); min_x = *(std::min_element(place_move_ctx.X_coord.begin(), place_move_ctx.X_coord.end())); max_y = *(std::max_element(place_move_ctx.Y_coord.begin(), place_move_ctx.Y_coord.end())); @@ -60,9 +60,6 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& //Get the most critical output of the node int xt, yt; - xt = 0; - yt = 0; - ClusterBlockId b_output = cluster_ctx.clb_nlist.net_pin_block(net_from, pin_from); t_pl_loc output_loc = place_ctx.block_locs[b_output].loc; xt = output_loc.x; @@ -100,10 +97,9 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& } VTR_ASSERT(FR_coords.ymin <= FR_coords.ymax); - t_range_limiters range_limiters; - range_limiters.original_rlim = rlim; - range_limiters.dm_rlim = placer_opts.place_dm_rlim; - range_limiters.first_rlim = place_move_ctx.first_rlim; + t_range_limiters range_limiters{rlim, + place_move_ctx.first_rlim, + placer_opts.place_dm_rlim}; // Try to find a legal location inside the feasible region if (!find_to_loc_median(cluster_from_type, from, &FR_coords, to, b_from)) { diff --git a/vpr/src/place/feasible_region_move_generator.h b/vpr/src/place/feasible_region_move_generator.h index 38949d0a03e..0f635c00a57 100644 --- a/vpr/src/place/feasible_region_move_generator.h +++ b/vpr/src/place/feasible_region_move_generator.h @@ -19,7 +19,7 @@ * */ class FeasibleRegionMoveGenerator : public MoveGenerator { - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities); + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) override; }; #endif diff --git a/vpr/src/place/manual_move_generator.cpp b/vpr/src/place/manual_move_generator.cpp index 2cc80347df1..6e2cf43d5cc 100644 --- a/vpr/src/place/manual_move_generator.cpp +++ b/vpr/src/place/manual_move_generator.cpp @@ -13,7 +13,7 @@ #endif //NO_GRAPHICS //Manual Move Generator function -e_create_move ManualMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& /*blk_type*/, float /*rlim*/, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) { +e_create_move ManualMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& /*proposed_action*/, float /*rlim*/, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) { int block_id = -1; t_pl_loc to; diff --git a/vpr/src/place/manual_move_generator.h b/vpr/src/place/manual_move_generator.h index 4beabd2cce7..2995006e908 100644 --- a/vpr/src/place/manual_move_generator.h +++ b/vpr/src/place/manual_move_generator.h @@ -27,7 +27,7 @@ class ManualMoveGenerator : public MoveGenerator { public: //Evaluates if move is successful and legal or unable to do. - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& /*blk_type*/, float /*rlim*/, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/); + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& /*proposed_action*/, float /*rlim*/, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) override; }; #endif /*VPR_MANUAL_MOVE_GEN_H */ diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp index 9dae21bca5b..5f7df239ead 100644 --- a/vpr/src/place/median_move_generator.cpp +++ b/vpr/src/place/median_move_generator.cpp @@ -9,9 +9,9 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xo static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_coord_new, ClusterBlockId block_id, bool& skip_net); -e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) { +e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) { //Find a movable block based on blk_type - ClusterBlockId b_from = propose_block_to_move(blk_type, false, NULL, NULL); + ClusterBlockId b_from = propose_block_to_move(proposed_action.logical_blk_type_index, false, nullptr, nullptr); if (!b_from) { //No movable block found return e_create_move::ABORT; @@ -95,7 +95,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ place_move_ctx.Y_coord.push_back(coords.ymax); } - if ((place_move_ctx.X_coord.size() == 0) || (place_move_ctx.Y_coord.size() == 0)) + if ((place_move_ctx.X_coord.empty()) || (place_move_ctx.Y_coord.empty())) return e_create_move::ABORT; //calculate the median region @@ -109,10 +109,9 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ limit_coords.ymax = place_move_ctx.Y_coord[floor((place_move_ctx.Y_coord.size() - 1) / 2) + 1]; //arrange the different range limiters - t_range_limiters range_limiters; - range_limiters.original_rlim = rlim; - range_limiters.first_rlim = place_move_ctx.first_rlim; - range_limiters.dm_rlim = placer_opts.place_dm_rlim; + t_range_limiters range_limiters{rlim, + place_move_ctx.first_rlim, + placer_opts.place_dm_rlim}; //find a location in a range around the center of median region t_pl_loc median_point; diff --git a/vpr/src/place/median_move_generator.h b/vpr/src/place/median_move_generator.h index 49f5c5b010b..ccecdf86a0e 100644 --- a/vpr/src/place/median_move_generator.h +++ b/vpr/src/place/median_move_generator.h @@ -16,7 +16,7 @@ * around it */ class MedianMoveGenerator : public MoveGenerator { - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/); + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) override; }; #endif diff --git a/vpr/src/place/move_generator.h b/vpr/src/place/move_generator.h index 378d86c8bf1..83791bbcf9b 100644 --- a/vpr/src/place/move_generator.h +++ b/vpr/src/place/move_generator.h @@ -42,7 +42,7 @@ struct MoveTypeStat { */ class MoveGenerator { public: - virtual ~MoveGenerator() {} + virtual ~MoveGenerator() = default; /** * @brief Updates affected_blocks with the proposed move, while respecting the current rlim @@ -51,14 +51,15 @@ class MoveGenerator { * to match the parameters needed by all move generators * * @param blocks_affectedt: the output of the move - * @param move_type: the move type used + * @param proposed_action: Contains the move type and block type. If the block type is specified, + * the proposed move swaps instances of the given block type. Otherwise, the selected block type + * by the move generator is written to proposed_action.logical_blk_type_index. + * If proposed_action.logical_blk_type_index is -1, this function will choose the block from the netlist (regardless of type). * @param rlim: maximum distance a block can move in x or y direction, in the compressed grid space * @param placer_opts: all the placer options * @param criticalities: the placer criticalities, useful for timing directed moves - * @param blk_type: function proposes a move with given block type if specified. - * If blk_type index is -1, this function will choose the block randomly from the netlist (regardless of type). */ - virtual e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) = 0; + virtual e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) = 0; /** * @brief Recieves feedback about the outcome of the previously proposed move diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp index 5e1188db6c3..5a1e9f1cd60 100644 --- a/vpr/src/place/move_utils.cpp +++ b/vpr/src/place/move_utils.cpp @@ -18,7 +18,7 @@ bool f_placer_breakpoint_reached = false; //Records counts of reasons for aborted moves static std::map f_move_abort_reasons; -void log_move_abort(std::string reason) { +void log_move_abort(const std::string& reason) { ++f_move_abort_reasons[reason]; } @@ -497,34 +497,11 @@ std::set determine_locations_emptied_by_move(t_pl_blocks_to_be_moved& return empty_locs; } -int convert_agent_to_phys_blk_type(int agent_blk_type_index) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); - if (place_ctx.phys_blk_type_to_agent_blk_type_map.count(agent_blk_type_index)) { - return place_ctx.phys_blk_type_to_agent_blk_type_map[agent_blk_type_index]; - } - //invalid block type - return -1; -} - -int convert_phys_to_agent_blk_type(int phys_blk_type_index) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); - if (place_ctx.agent_blk_type_to_phys_blk_type_map.count(phys_blk_type_index)) { - return place_ctx.agent_blk_type_to_phys_blk_type_map[phys_blk_type_index]; - } - //invalid block type - return -1; -} - -int get_num_agent_types() { - auto& place_ctx = g_vpr_ctx.placement(); - return place_ctx.phys_blk_type_to_agent_blk_type_map.size(); -} - -ClusterBlockId propose_block_to_move(t_logical_block_type& blk_type, bool highly_crit_block, ClusterNetId* net_from, int* pin_from) { +ClusterBlockId propose_block_to_move(int& logical_blk_type_index, bool highly_crit_block, ClusterNetId* net_from, int* pin_from) { ClusterBlockId b_from = ClusterBlockId::INVALID(); auto& cluster_ctx = g_vpr_ctx.clustering(); - if (blk_type.index == -1) { //If the block type is unspecified, choose any random block to be swapped with another random block + if (logical_blk_type_index == -1) { //If the block type is unspecified, choose any random block to be swapped with another random block if (highly_crit_block) { b_from = pick_from_highly_critical_block(*net_from, *pin_from); } else { @@ -533,13 +510,13 @@ ClusterBlockId propose_block_to_move(t_logical_block_type& blk_type, bool highly //if a movable block found, set the block type if (b_from) { - blk_type.index = convert_phys_to_agent_blk_type(cluster_ctx.clb_nlist.block_type(b_from)->index); + logical_blk_type_index = cluster_ctx.clb_nlist.block_type(b_from)->index; } } else { //If the block type is specified, choose a random block with blk_type to be swapped with another random block if (highly_crit_block) { - b_from = pick_from_highly_critical_block(*net_from, *pin_from, blk_type); + b_from = pick_from_highly_critical_block(*net_from, *pin_from, logical_blk_type_index); } else { - b_from = pick_from_block(blk_type); + b_from = pick_from_block(logical_blk_type_index); } } @@ -583,7 +560,7 @@ ClusterBlockId pick_from_block() { //Pick a random block with a specific blk_type to be swapped with another random block. //If none is found return ClusterBlockId::INVALID() -ClusterBlockId pick_from_block(t_logical_block_type blk_type) { +ClusterBlockId pick_from_block(const int logical_blk_type_index) { /* Some blocks may be fixed, and should never be moved from their * * initial positions. If we randomly selected such a block try * * another random block. * @@ -593,11 +570,11 @@ ClusterBlockId pick_from_block(t_logical_block_type blk_type) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.mutable_placement(); t_logical_block_type blk_type_temp; - blk_type_temp.index = convert_agent_to_phys_blk_type(blk_type.index); - auto blocks_per_type = cluster_ctx.clb_nlist.blocks_per_type(blk_type_temp); + blk_type_temp.index = logical_blk_type_index; + const auto& blocks_per_type = cluster_ctx.clb_nlist.blocks_per_type(blk_type_temp); //no blocks with this type is available - if (blocks_per_type.size() == 0) { + if (blocks_per_type.empty()) { return ClusterBlockId::INVALID(); } @@ -635,7 +612,7 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_ pin_from = -1; //check if any critical block is available - if (place_move_ctx.highly_crit_pins.size() == 0) { + if (place_move_ctx.highly_crit_pins.empty()) { return ClusterBlockId::INVALID(); } @@ -657,7 +634,7 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_ //Pick a random highly critical block with a specified block type to be swapped with another random block. //If none is found return ClusterBlockId::INVALID() -ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from, t_logical_block_type blk_type) { +ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from, const int logical_blk_type_index) { auto& place_move_ctx = g_placer_ctx.move(); auto& place_ctx = g_vpr_ctx.placement(); auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -667,7 +644,7 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_ pin_from = -1; //check if any critical block is available - if (place_move_ctx.highly_crit_pins.size() == 0) { + if (place_move_ctx.highly_crit_pins.empty()) { return ClusterBlockId::INVALID(); } @@ -678,7 +655,7 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_ //Check if picked block type matches with the blk_type specified, and it is not fixed //blk_type from propose move doesn't account for the EMPTY type auto b_from_type = cluster_ctx.clb_nlist.block_type(b_from); - if (convert_phys_to_agent_blk_type(b_from_type->index) == blk_type.index) { + if (b_from_type->index == logical_blk_type_index) { if (place_ctx.block_locs[b_from].is_fixed) { return ClusterBlockId::INVALID(); //Block is fixed, cannot move } @@ -953,7 +930,7 @@ static const std::array move_type_strings = "Manual Move"}; //To convert enum move type to string -std::string move_type_to_string(e_move_type move) { +const std::string& move_type_to_string(e_move_type move) { return move_type_strings[int(move)]; } diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h index 9cdc908fa29..acb2b0d95b0 100644 --- a/vpr/src/place/move_utils.h +++ b/vpr/src/place/move_utils.h @@ -28,7 +28,8 @@ enum class e_move_type { CRIT_UNIFORM, FEASIBLE_REGION, NUMBER_OF_AUTO_MOVES, - MANUAL_MOVE = NUMBER_OF_AUTO_MOVES + MANUAL_MOVE = NUMBER_OF_AUTO_MOVES, + INVALID_MOVE }; enum class e_create_move { @@ -43,8 +44,8 @@ enum class e_create_move { * random block type to be chosen to be swapped. */ struct t_propose_action { - e_move_type move_type; /// determine_locations_emptied_by_move(t_pl_blocks_to_be_moved& /** * @brief Propose block for the RL agent based on required block type. * - * @param blk_type: the agent type of the moving block. + * @param logical_blk_type_index: Index of the block type being perturbed, which is used to select the proper agent data * @param highly_crit_block: block should be chosen from highly critical blocks. * @param net_from: if block is chosen from highly critical blocks, should store the critical net id. * @param pin_from: if block is chosen from highly critical blocks, should save its critical pin id. * * @return block id if any blocks found. ClusterBlockId::INVALID() if no block found. */ -ClusterBlockId propose_block_to_move(t_logical_block_type& blk_type, bool highly_crit_block, ClusterNetId* net_from, int* pin_from); +ClusterBlockId propose_block_to_move(int& logical_blk_type_index, bool highly_crit_block, ClusterNetId* net_from, int* pin_from); /** * @brief Select a random block to be swapped with another block @@ -131,11 +132,11 @@ ClusterBlockId pick_from_block(); /** * @brief Find a block with a specific block type to be swapped with another block * - * @param blk_type: the agent type of the moving block. + * @param logical_blk_type_index: the agent type of the moving block. * * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found */ -ClusterBlockId pick_from_block(t_logical_block_type blk_type); +ClusterBlockId pick_from_block(int logical_blk_type_index); /** * @brief Select a random highly critical block to be swapped with another block @@ -147,11 +148,11 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_ /** * @brief Find a block with a specific block type to be swapped with another block * - * @param blk_type: the agent type of the moving block. + * @param logical_blk_type_index: the agent type of the moving block. * * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found */ -ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from, t_logical_block_type blk_type); +ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from, int logical_blk_type_index); bool find_to_loc_uniform(t_logical_block_type_ptr type, float rlim, @@ -203,7 +204,7 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type, t_pl_loc& to_loc, ClusterBlockId b_from); -std::string move_type_to_string(e_move_type); +const std::string& move_type_to_string(e_move_type); /* find to loaction helper functions */ /** @@ -323,29 +324,4 @@ bool intersect_range_limit_with_floorplan_constraints(t_logical_block_type_ptr t std::string e_move_result_to_string(e_move_result move_outcome); -/** - * @brief find the physical block type index associated to the agent block type - * - * Agent block types are defined as physical block types used by the netlist. - * More information on agent block type can be found on the placement context in "vpr_context.h" - * - * @return physical block type index associated with the agent_blk_type_index - */ -int convert_agent_to_phys_blk_type(int agent_blk_type_index); - -/** - * @brief find the agent block type index associated to the physical block type - * - * Agent block types are defined as physical block types used by the netlist. - * More information on agent block type can be found on the placement context in "vpr_context.h" - * - * @return agent block type index associated with the phys_blk_type_index - */ -int convert_phys_to_agent_blk_type(int phys_blk_type_index); - -/** - * @brief return number of available block types in the RLplace agent. - */ -int get_num_agent_types(); - #endif diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 1b9a6508010..b9b1a06b84d 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include "NetPinTimingInvalidator.h" #include "vtr_assert.h" @@ -761,9 +762,9 @@ void try_place(const Netlist<>& net_list, //allocate move type statistics vectors MoveTypeStat move_type_stat; - move_type_stat.blk_type_moves.resize((get_num_agent_types()) * (placer_opts.place_static_move_prob.size()), 0); - move_type_stat.accepted_moves.resize((get_num_agent_types()) * (placer_opts.place_static_move_prob.size()), 0); - move_type_stat.rejected_moves.resize((get_num_agent_types()) * (placer_opts.place_static_move_prob.size()), 0); + move_type_stat.blk_type_moves.resize(device_ctx.logical_block_types.size() * placer_opts.place_static_move_prob.size(), 0); + move_type_stat.accepted_moves.resize(device_ctx.logical_block_types.size() * placer_opts.place_static_move_prob.size(), 0); + move_type_stat.rejected_moves.resize(device_ctx.logical_block_types.size() * placer_opts.place_static_move_prob.size(), 0); /* Get the first range limiter */ first_rlim = (float)max(device_ctx.grid.width() - 1, @@ -1443,8 +1444,8 @@ static e_move_result try_swap(const t_annealing_state* state, crit_params.crit_exponent = state->crit_exponent; crit_params.crit_limit = placer_opts.place_crit_limit; - e_move_type move_type = e_move_type::UNIFORM; //move type number - t_logical_block_type move_blk_type; //blk type that is chosen to be moved by the agent + // move type and block type chosen by the agent + t_propose_action proposed_action{e_move_type::UNIFORM, -1}; num_ts_called++; @@ -1477,19 +1478,19 @@ static e_move_result try_swap(const t_annealing_state* state, //When manual move toggle button is active, the manual move window asks the user for input. if (manual_move_enabled) { #ifndef NO_GRAPHICS - create_move_outcome = manual_move_display_and_propose(manual_move_generator, blocks_affected, move_type, rlim, placer_opts, criticalities); + create_move_outcome = manual_move_display_and_propose(manual_move_generator, blocks_affected, proposed_action.move_type, rlim, placer_opts, criticalities); #endif //NO_GRAPHICS } else if (router_block_move) { // generate a move where two random router blocks are swapped create_move_outcome = propose_router_swap(blocks_affected, rlim); - move_type = e_move_type::UNIFORM; + proposed_action.move_type = e_move_type::UNIFORM; } else { //Generate a new move (perturbation) used to explore the space of possible placements - create_move_outcome = move_generator.propose_move(blocks_affected, move_type, move_blk_type, rlim, placer_opts, criticalities); + create_move_outcome = move_generator.propose_move(blocks_affected, proposed_action, rlim, placer_opts, criticalities); } - if (move_blk_type.index != -1) { //if the agent proposed the block type, then collect the block type stat - ++move_type_stat.blk_type_moves[(move_blk_type.index * (placer_opts.place_static_move_prob.size())) + (int)move_type]; + if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat + ++move_type_stat.blk_type_moves[(proposed_action.logical_blk_type_index * (placer_opts.place_static_move_prob.size())) + (int)proposed_action.move_type]; } LOG_MOVE_STATS_PROPOSED(t, blocks_affected); @@ -1623,8 +1624,8 @@ static e_move_result try_swap(const t_annealing_state* state, /* Update clb data structures since we kept the move. */ commit_move_blocks(blocks_affected); - if (move_blk_type.index != -1) { //if the agent proposed the block type, then collect the block type stat - ++move_type_stat.accepted_moves[(move_blk_type.index * (placer_opts.place_static_move_prob.size())) + (int)move_type]; + if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat + ++move_type_stat.accepted_moves[(proposed_action.logical_blk_type_index * (placer_opts.place_static_move_prob.size())) + (int)proposed_action.move_type]; } if (noc_opts.noc) { commit_noc_costs(); @@ -1676,8 +1677,8 @@ static e_move_result try_swap(const t_annealing_state* state, revert_td_cost(blocks_affected); } - if (move_blk_type.index != -1) { //if the agent proposed the block type, then collect the block type stat - ++move_type_stat.rejected_moves[(move_blk_type.index * (placer_opts.place_static_move_prob.size())) + (int)move_type]; + if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat + ++move_type_stat.rejected_moves[(proposed_action.logical_blk_type_index * (placer_opts.place_static_move_prob.size())) + (int)proposed_action.move_type]; } /* Revert the traffic flow routes within the NoC*/ if (noc_opts.noc) { @@ -3254,32 +3255,30 @@ static void print_placement_move_types_stats( "------------------ ----------------- ---------------- ---------------- --------------- ------------ \n"); float total_moves = 0; - for (size_t iaction = 0; iaction < move_type_stat.blk_type_moves.size(); iaction++) { - total_moves += move_type_stat.blk_type_moves[iaction]; + for (int blk_type_move : move_type_stat.blk_type_moves) { + total_moves += blk_type_move; } auto& device_ctx = g_vpr_ctx.device(); auto& cluster_ctx = g_vpr_ctx.clustering(); - std::string move_name; - int agent_type = 0; int count = 0; - int num_of_avail_moves = move_type_stat.blk_type_moves.size() / get_num_agent_types(); + int num_of_avail_moves = move_type_stat.blk_type_moves.size() / device_ctx.logical_block_types.size(); //Print placement information for each block type - for (auto itype : device_ctx.logical_block_types) { + for (const auto& itype : device_ctx.logical_block_types) { //Skip non-existing block types in the netlist - if (itype.index == 0 || cluster_ctx.clb_nlist.blocks_per_type(itype).size() == 0) { + if (itype.index == 0 || cluster_ctx.clb_nlist.blocks_per_type(itype).empty()) { continue; } count = 0; for (int imove = 0; imove < num_of_avail_moves; imove++) { - move_name = move_type_to_string(e_move_type(imove)); - moves = move_type_stat.blk_type_moves[agent_type * num_of_avail_moves + imove]; + const auto& move_name = move_type_to_string(e_move_type(imove)); + moves = move_type_stat.blk_type_moves[itype.index * num_of_avail_moves + imove]; if (moves != 0) { - accepted = move_type_stat.accepted_moves[agent_type * num_of_avail_moves + imove]; - rejected = move_type_stat.rejected_moves[agent_type * num_of_avail_moves + imove]; + accepted = move_type_stat.accepted_moves[itype.index * num_of_avail_moves + imove]; + rejected = move_type_stat.rejected_moves[itype.index * num_of_avail_moves + imove]; aborted = moves - (accepted + rejected); if (count == 0) { VTR_LOG("%-18.20s", itype.name); @@ -3294,7 +3293,6 @@ static void print_placement_move_types_stats( } count++; } - agent_type++; VTR_LOG("\n"); } VTR_LOG("\n"); @@ -3307,16 +3305,19 @@ static void calculate_reward_and_process_outcome( float timing_bb_factor, MoveGenerator& move_generator) { std::string reward_fun_string = placer_opts.place_reward_fun; - e_reward_function reward_fun = string_to_reward(reward_fun_string); + static std::optional reward_fun; + if (!reward_fun.has_value()) { + reward_fun = string_to_reward(reward_fun_string); + } if (reward_fun == BASIC) { - move_generator.process_outcome(-1 * delta_c, reward_fun); + move_generator.process_outcome(-1 * delta_c, reward_fun.value()); } else if (reward_fun == NON_PENALIZING_BASIC || reward_fun == RUNTIME_AWARE) { if (delta_c < 0) { - move_generator.process_outcome(-1 * delta_c, reward_fun); + move_generator.process_outcome(-1 * delta_c, reward_fun.value()); } else { - move_generator.process_outcome(0, reward_fun); + move_generator.process_outcome(0, reward_fun.value()); } } else if (reward_fun == WL_BIASED_RUNTIME_AWARE) { if (delta_c < 0) { @@ -3326,9 +3327,9 @@ static void calculate_reward_and_process_outcome( * move_outcome_stats.delta_timing_cost_norm + timing_bb_factor * move_outcome_stats.delta_bb_cost_norm); - move_generator.process_outcome(reward, reward_fun); + move_generator.process_outcome(reward, reward_fun.value()); } else { - move_generator.process_outcome(0, reward_fun); + move_generator.process_outcome(0, reward_fun.value()); } } } diff --git a/vpr/src/place/simpleRL_move_generator.cpp b/vpr/src/place/simpleRL_move_generator.cpp index 3f9c92f6ae0..17753d66a88 100644 --- a/vpr/src/place/simpleRL_move_generator.cpp +++ b/vpr/src/place/simpleRL_move_generator.cpp @@ -4,7 +4,7 @@ #include #include "vtr_random.h" - +#include "vtr_time.h" /* File-scope routines */ //a scaled and clipped exponential function static float scaled_clipped_exp(float x) { return std::exp(std::min(1000 * x, float(3.0))); } @@ -14,39 +14,9 @@ static float scaled_clipped_exp(float x) { return std::exp(std::min(1000 * x, fl * RL move generator implementation * * * * */ -SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr& agent) { - avail_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES); - - avail_moves[(int)e_move_type::UNIFORM] = std::make_unique(); - avail_moves[(int)e_move_type::MEDIAN] = std::make_unique(); - avail_moves[(int)e_move_type::CENTROID] = std::make_unique(); - avail_moves[(int)e_move_type::W_CENTROID] = std::make_unique(); - avail_moves[(int)e_move_type::W_MEDIAN] = std::make_unique(); - avail_moves[(int)e_move_type::CRIT_UNIFORM] = std::make_unique(); - avail_moves[(int)e_move_type::FEASIBLE_REGION] = std::make_unique(); - - karmed_bandit_agent = std::move(agent); -} - -SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr& agent) { - avail_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES); - - avail_moves[(int)e_move_type::UNIFORM] = std::make_unique(); - avail_moves[(int)e_move_type::MEDIAN] = std::make_unique(); - avail_moves[(int)e_move_type::CENTROID] = std::make_unique(); - avail_moves[(int)e_move_type::W_CENTROID] = std::make_unique(); - avail_moves[(int)e_move_type::W_MEDIAN] = std::make_unique(); - avail_moves[(int)e_move_type::CRIT_UNIFORM] = std::make_unique(); - avail_moves[(int)e_move_type::FEASIBLE_REGION] = std::make_unique(); - - karmed_bandit_agent = std::move(agent); -} - -e_create_move SimpleRLMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& move_type, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { - auto propose_action_out = karmed_bandit_agent->propose_action(); - move_type = propose_action_out.move_type; - blk_type = propose_action_out.blk_type; // can be empty to allow agent to only choose move type (pick a block randomly) - return avail_moves[(int)move_type]->propose_move(blocks_affected, move_type, blk_type, rlim, placer_opts, criticalities); +e_create_move SimpleRLMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { + proposed_action = karmed_bandit_agent->propose_action(); + return avail_moves[(int)proposed_action.move_type]->propose_move(blocks_affected, proposed_action, rlim, placer_opts, criticalities); } void SimpleRLMoveGenerator::process_outcome(double reward, e_reward_function reward_fun) { @@ -58,6 +28,77 @@ void SimpleRLMoveGenerator::process_outcome(double reward, e_reward_function rew * K-Armed bandit agent implementation * * * * */ +KArmedBanditAgent::KArmedBanditAgent(size_t num_moves, e_agent_space agent_space) + : num_available_moves_(num_moves) + , propose_blk_type_(agent_space == e_agent_space::MOVE_BLOCK_TYPE) { + std::vector available_logical_block_types = get_available_logical_blk_types_(); + num_available_types_ = available_logical_block_types.size(); + + num_available_actions_ = propose_blk_type_ ? (num_available_moves_ * num_available_types_) : num_available_moves_; + + action_logical_blk_type_.clear(); + + for (auto logical_blk_type_idx : available_logical_block_types) { + action_logical_blk_type_.push_back(logical_blk_type_idx); + } +} + +/* + * If the agent selects both move type and block type, the would lool like this: + * + * +---------------+---------------+---------------+---------------+ + * | (blk0, move0) | (blk0, move1) | ............. | (blk0, moveN) | + * +---------------+---------------+---------------+---------------+ + * | (blk1, move0) | (blk1, move1) | ............. | (blk1, moveN) | + * +---------------+---------------+---------------+---------------+ + * | .. | .. | ............. | .. | + * +---------------+---------------+---------------+---------------+ + * | (blkK, move0) | (blkK, move1) | ............. | (blkK, moveN) | + * +---------------+---------------+---------------+---------------+ + * + * This meant that (action_idx % num_available_moves_) specifies the move type, + * while (action_idx / num_available_moves_) determines the block type. + * + */ +e_move_type KArmedBanditAgent::action_to_move_type_(const size_t action_idx) { + e_move_type move_type = e_move_type::INVALID_MOVE; + + if (action_idx < num_available_actions_) { + move_type = (e_move_type)(action_idx % num_available_moves_); + } + + return move_type; +} + +int KArmedBanditAgent::action_to_blk_type_(const size_t action_idx) { + if (propose_blk_type_) { + return action_logical_blk_type_.at(action_idx / num_available_moves_); + } else { // the agent doesn't select the move type + return -1; + } +} + +std::vector KArmedBanditAgent::get_available_logical_blk_types_() { + auto& device_ctx = g_vpr_ctx.device(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + + std::vector available_blk_types; + + for (const auto& logical_blk_type : device_ctx.logical_block_types) { + if (logical_blk_type.index == 0) { //ignore empty type + continue; + } + + const auto& blk_per_type = cluster_ctx.clb_nlist.blocks_per_type(logical_blk_type); + + if (!blk_per_type.empty()) { + available_blk_types.push_back(logical_blk_type.index); + } + } + + return available_blk_types; +} + void KArmedBanditAgent::process_outcome(double reward, e_reward_function reward_fun) { ++num_action_chosen_[last_action_]; if (reward_fun == RUNTIME_AWARE || reward_fun == WL_BIASED_RUNTIME_AWARE) @@ -66,9 +107,9 @@ void KArmedBanditAgent::process_outcome(double reward, e_reward_function reward_ //Determine step size float step = 0.; if (exp_alpha_ < 0.) { - step = 1. / num_action_chosen_[last_action_]; //Incremental average + step = 1.0f / (float)num_action_chosen_[last_action_]; //Incremental average } else if (exp_alpha_ <= 1) { - step = exp_alpha_; //Exponentially wieghted average + step = exp_alpha_; //Exponentially weighted average } else { VTR_ASSERT_MSG(false, "Invalid step size"); } @@ -92,45 +133,61 @@ void KArmedBanditAgent::write_agent_info(int last_action, double reward) { fprintf(agent_info_file_, "%d,", last_action); fprintf(agent_info_file_, "%g,", reward); - for (size_t i = 0; i < num_available_moves_ * num_available_types_; ++i) { + for (size_t i = 0; i < num_available_actions_; ++i) { fprintf(agent_info_file_, "%g,", q_[i]); } - for (size_t i = 0; i < num_available_moves_ * num_available_types_; ++i) { + for (size_t i = 0; i < num_available_actions_; ++i) { fprintf(agent_info_file_, "%zu,", num_action_chosen_[i]); } fprintf(agent_info_file_, "\n"); fflush(agent_info_file_); } +void KArmedBanditAgent::set_step(float gamma, int move_lim) { + if (gamma < 0) { + exp_alpha_ = -1; //Use sample average + } else { + // + // For an exponentially weighted average the fraction of total weight applied + // to moves which occured > K moves ago is: + // + // gamma = (1 - alpha)^K + // + // If we treat K as the number of moves per temperature (move_lim) then gamma + // is the fraction of weight applied to moves which occured > move_lim moves ago, + // and given a target gamma we can explicitly calcualte the alpha step-size + // required by the agent: + // + // alpha = 1 - e^(log(gamma) / K) + // + float alpha = 1 - std::exp(std::log(gamma) / move_lim); + exp_alpha_ = alpha; + } +} + +int KArmedBanditAgent::agent_to_phy_blk_type(const int idx) { + return action_logical_blk_type_.at(idx); +} /* * * * * E-greedy agent implementation * * * * */ -EpsilonGreedyAgent::EpsilonGreedyAgent(size_t num_moves, float epsilon) { +EpsilonGreedyAgent::EpsilonGreedyAgent(size_t num_moves, e_agent_space agent_space, float epsilon) + : KArmedBanditAgent(num_moves, agent_space) { set_epsilon(epsilon); - num_available_moves_ = num_moves; - num_available_types_ = 1; - init_q_scores(); -} - -EpsilonGreedyAgent::EpsilonGreedyAgent(size_t num_moves, size_t num_types, float epsilon) { - set_epsilon(epsilon); - num_available_moves_ = num_moves; - num_available_types_ = num_types; - propose_blk_type = true; - init_q_scores(); + init_q_scores_(); } EpsilonGreedyAgent::~EpsilonGreedyAgent() { if (agent_info_file_) vtr::fclose(agent_info_file_); } -void EpsilonGreedyAgent::init_q_scores() { - q_ = std::vector(num_available_moves_ * num_available_types_, 0.); - num_action_chosen_ = std::vector(num_available_moves_ * num_available_types_, 0); - cumm_epsilon_action_prob_ = std::vector(num_available_moves_ * num_available_types_, 1.0 / (num_available_moves_ * num_available_types_)); +void EpsilonGreedyAgent::init_q_scores_() { + q_ = std::vector(num_available_actions_, 0.); + num_action_chosen_ = std::vector(num_available_actions_, 0); + cumm_epsilon_action_prob_ = std::vector(num_available_actions_, 1.0 / (num_available_actions_)); //agent_info_file_ = vtr::fopen("agent_info.txt", "w"); //write agent internal q-table and actions into file for debugging purposes @@ -142,43 +199,15 @@ void EpsilonGreedyAgent::init_q_scores() { set_epsilon_action_prob(); } -void EpsilonGreedyAgent::set_step(float gamma, int move_lim) { - VTR_LOG("Setting egreedy step: %g\n", exp_alpha_); - if (gamma < 0) { - exp_alpha_ = -1; //Use sample average - } else { - // - // For an exponentially weighted average the fraction of total weight applied - // to moves which occurred > K moves ago is: - // - // gamma = (1 - alpha)^K - // - // If we treat K as the number of moves per temperature (move_lim) then gamma - // is the fraction of weight applied to moves which occurred > move_lim moves ago, - // and given a target gamma we can explicitly calculate the alpha step-size - // required by the agent: - // - // alpha = 1 - e^(log(gamma) / K) - // - float alpha = 1 - std::exp(std::log(gamma) / move_lim); - exp_alpha_ = alpha; - } -} - t_propose_action EpsilonGreedyAgent::propose_action() { - size_t move_type; - t_logical_block_type blk_type; - if (vtr::frand() < epsilon_) { /* Explore * With probability epsilon, choose randomly amongst all move types */ float p = vtr::frand(); auto itr = std::lower_bound(cumm_epsilon_action_prob_.begin(), cumm_epsilon_action_prob_.end(), p); auto action_type_q_pos = itr - cumm_epsilon_action_prob_.begin(); - move_type = (action_type_q_pos) % num_available_moves_; - if (propose_blk_type) { //calculate block type index only if agent is supposed to propose both move and block type - blk_type.index = action_type_q_pos / num_available_moves_; - } + //Mark the q_table location that agent used to update its value after processing the move outcome + last_action_ = action_type_q_pos; } else { /* Greedy (Exploit) @@ -186,25 +215,17 @@ t_propose_action EpsilonGreedyAgent::propose_action() { auto itr = std::max_element(q_.begin(), q_.end()); VTR_ASSERT(itr != q_.end()); auto action_type_q_pos = itr - q_.begin(); - move_type = action_type_q_pos % num_available_moves_; - if (propose_blk_type) { //calculate block type index only if agent is supposed to propose both move and block type - blk_type.index = action_type_q_pos / num_available_moves_; - } + //Mark the q_table location that agent used to update its value after processing the move outcome + last_action_ = action_type_q_pos; } - //Check the move type to be a valid move - VTR_ASSERT(move_type < num_available_moves_); - //Check the block type index to be valid type if the agent is supposed to propose block type - VTR_ASSERT((size_t)blk_type.index < num_available_types_ || !propose_blk_type); - - //Mark the q_table location that agent used to update its value after processing the move outcome - last_action_ = (!propose_blk_type) ? move_type : move_type + (blk_type.index * num_available_moves_); + t_propose_action proposed_action{action_to_move_type_(last_action_), + action_to_blk_type_(last_action_)}; - t_propose_action propose_action; - propose_action.move_type = (e_move_type)move_type; - propose_action.blk_type = blk_type; + //Check the move type to be a valid move + VTR_ASSERT((size_t)proposed_action.move_type < num_available_moves_); - return propose_action; + return proposed_action; } void EpsilonGreedyAgent::set_epsilon(float epsilon) { @@ -214,10 +235,10 @@ void EpsilonGreedyAgent::set_epsilon(float epsilon) { void EpsilonGreedyAgent::set_epsilon_action_prob() { //initialize to equal probabilities - std::vector epsilon_prob(num_available_moves_ * num_available_types_, 1.0 / (num_available_moves_ * num_available_types_)); + std::vector epsilon_prob(num_available_actions_, 1.0 / (num_available_actions_)); float accum = 0; - for (size_t i = 0; i < num_available_moves_ * num_available_types_; ++i) { + for (size_t i = 0; i < num_available_actions_; ++i) { accum += epsilon_prob[i]; cumm_epsilon_action_prob_[i] = accum; } @@ -228,30 +249,22 @@ void EpsilonGreedyAgent::set_epsilon_action_prob() { * Softmax agent implementation * * * * */ -SoftmaxAgent::SoftmaxAgent(size_t num_moves) { - num_available_moves_ = num_moves; - num_available_types_ = 1; - init_q_scores(); -} - -SoftmaxAgent::SoftmaxAgent(size_t num_moves, size_t num_types) { - num_available_moves_ = num_moves; - num_available_types_ = num_types; - propose_blk_type = true; - init_q_scores(); +SoftmaxAgent::SoftmaxAgent(size_t num_moves, e_agent_space agent_space) + : KArmedBanditAgent(num_moves, agent_space) { + init_q_scores_(); } SoftmaxAgent::~SoftmaxAgent() { if (agent_info_file_) vtr::fclose(agent_info_file_); } -void SoftmaxAgent::init_q_scores() { - q_ = std::vector(num_available_moves_ * num_available_types_, 0.); - exp_q_ = std::vector(num_available_moves_ * num_available_types_, 0.); - num_action_chosen_ = std::vector(num_available_moves_ * num_available_types_, 0); - action_prob_ = std::vector(num_available_moves_ * num_available_types_, 0.); +void SoftmaxAgent::init_q_scores_() { + q_ = std::vector(num_available_actions_, 0.); + exp_q_ = std::vector(num_available_actions_, 0.); + num_action_chosen_ = std::vector(num_available_actions_, 0); + action_prob_ = std::vector(num_available_actions_, 0.); block_type_ratio_ = std::vector(num_available_types_, 0.); - cumm_action_prob_ = std::vector(num_available_moves_ * num_available_types_); + cumm_action_prob_ = std::vector(num_available_actions_); // agent_info_file_ = vtr::fopen("agent_info.txt", "w"); //write agent internal q-table and actions into file for debugging purposes @@ -265,52 +278,36 @@ void SoftmaxAgent::init_q_scores() { * If the agent is supposed to propose both block type and move type, * it will use the block ratio to calculate action probability for each q_table entry. */ - if (propose_blk_type) { - set_block_ratio(); + if (propose_blk_type_) { + set_block_ratio_(); } - set_action_prob(); + set_action_prob_(); } t_propose_action SoftmaxAgent::propose_action() { - set_action_prob(); - - size_t move_type; - t_logical_block_type blk_type; + set_action_prob_(); float p = vtr::frand(); auto itr = std::lower_bound(cumm_action_prob_.begin(), cumm_action_prob_.end(), p); auto action_type_q_pos = itr - cumm_action_prob_.begin(); - move_type = (action_type_q_pos) % num_available_moves_; - if (propose_blk_type) { //calculate block type index only if agent is supposed to propose both move and block type - blk_type.index = action_type_q_pos / num_available_moves_; - } - //To take care that the last element in cumm_action_prob_ might be less than 1 by a small value - if ((size_t)action_type_q_pos == num_available_moves_ * num_available_types_) { - move_type = num_available_moves_ - 1; - if (propose_blk_type) { //calculate block type index only if agent is supposed to propose both move and block type - blk_type.index = num_available_types_ - 1; - } - } - - //Check the move type to be a valid move - VTR_ASSERT(move_type < num_available_moves_); - //Check the block type index to be valid type if the agent is supposed to propose block type - VTR_ASSERT((size_t)blk_type.index < num_available_types_ || !propose_blk_type); + last_action_ = std::min((size_t)action_type_q_pos, num_available_actions_ - 1); - //Mark the q_table location that agent used to update its value after processing the move outcome - last_action_ = (!propose_blk_type) ? move_type : move_type + (blk_type.index * num_available_moves_); + t_propose_action proposed_action{action_to_move_type_(last_action_), + action_to_blk_type_(last_action_)}; - t_propose_action propose_action; - propose_action.move_type = (e_move_type)move_type; - propose_action.blk_type = blk_type; + //Check the move type to be a valid move + VTR_ASSERT((size_t)proposed_action.move_type < num_available_moves_); - return propose_action; + return proposed_action; } -void SoftmaxAgent::set_block_ratio() { +void SoftmaxAgent::set_block_ratio_() { auto& cluster_ctx = g_vpr_ctx.clustering(); - int num_total_blocks = cluster_ctx.clb_nlist.blocks().size(); + size_t num_total_blocks = cluster_ctx.clb_nlist.blocks().size(); + + // allocate enough space for available block types in the netlist + block_type_ratio_.resize(num_available_types_); /* Calculate ratio of each block as : (# blocks of each type / total blocks). * Each block type can have "num_available_moves_" different moves. Hence, @@ -318,23 +315,23 @@ void SoftmaxAgent::set_block_ratio() { */ for (size_t itype = 0; itype < num_available_types_; itype++) { t_logical_block_type blk_type; - blk_type.index = convert_agent_to_phys_blk_type(itype); + blk_type.index = agent_to_phy_blk_type(itype); auto num_blocks = cluster_ctx.clb_nlist.blocks_per_type(blk_type).size(); block_type_ratio_[itype] = (float)num_blocks / num_total_blocks; block_type_ratio_[itype] /= num_available_moves_; } } -void SoftmaxAgent::set_action_prob() { +void SoftmaxAgent::set_action_prob_() { //calculate the scaled and clipped exponential function for the estimated q value for each action std::transform(q_.begin(), q_.end(), exp_q_.begin(), scaled_clipped_exp); //calculate the sum of all scaled clipped exponential q values - float sum_q = accumulate(exp_q_.begin(), exp_q_.end(), 0.0); + float sum_q = std::accumulate(exp_q_.begin(), exp_q_.end(), 0.0); //calculate the probability of each action as the ratio of scaled_clipped_exp(action(i))/sum(scaled_clipped_exponential) - for (size_t i = 0; i < num_available_moves_ * num_available_types_; ++i) { - if (propose_blk_type) { + for (size_t i = 0; i < num_available_actions_; ++i) { + if (propose_blk_type_) { //calculate block type index based on its location on q_table int blk_ratio_index = (int)i / num_available_moves_; action_prob_[i] = (exp_q_[i] / sum_q) * block_type_ratio_[blk_ratio_index]; @@ -345,7 +342,7 @@ void SoftmaxAgent::set_action_prob() { // normalize all the action probabilities to guarantee the sum(all action probs) = 1 float sum_prob = std::accumulate(action_prob_.begin(), action_prob_.end(), 0.0); - if (propose_blk_type) { + if (propose_blk_type_) { std::transform(action_prob_.begin(), action_prob_.end(), action_prob_.begin(), [sum_prob](float x) { return x * (1 / sum_prob); }); } else { @@ -356,30 +353,8 @@ void SoftmaxAgent::set_action_prob() { // calculate the accumulative action probability of each action // e.g. if we have 5 actions with equal probability of 0.2, the cumm_action_prob will be {0.2,0.4,0.6,0.8,1.0} float accum = 0; - for (size_t i = 0; i < num_available_moves_ * num_available_types_; ++i) { + for (size_t i = 0; i < num_available_actions_; ++i) { accum += action_prob_[i]; cumm_action_prob_[i] = accum; } -} - -void SoftmaxAgent::set_step(float gamma, int move_lim) { - if (gamma < 0) { - exp_alpha_ = -1; //Use sample average - } else { - // - // For an exponentially weighted average the fraction of total weight applied - // to moves which occured > K moves ago is: - // - // gamma = (1 - alpha)^K - // - // If we treat K as the number of moves per temperature (move_lim) then gamma - // is the fraction of weight applied to moves which occured > move_lim moves ago, - // and given a target gamma we can explicitly calcualte the alpha step-size - // required by the agent: - // - // alpha = 1 - e^(log(gamma) / K) - // - float alpha = 1 - std::exp(std::log(gamma) / move_lim); - exp_alpha_ = alpha; - } -} +} \ No newline at end of file diff --git a/vpr/src/place/simpleRL_move_generator.h b/vpr/src/place/simpleRL_move_generator.h index f8f16602881..9ded69055d5 100644 --- a/vpr/src/place/simpleRL_move_generator.h +++ b/vpr/src/place/simpleRL_move_generator.h @@ -14,7 +14,8 @@ */ class KArmedBanditAgent { public: - virtual ~KArmedBanditAgent() {} + KArmedBanditAgent(size_t num_moves, e_agent_space agent_space); + virtual ~KArmedBanditAgent() = default; /** * @brief Choose a move type to perform and a block type that move should be performed with based on Q-table @@ -40,20 +41,75 @@ class KArmedBanditAgent { */ void write_agent_info(int last_action, double reward); + /** + * @brief Set step size for q-table updates + * + * @param gamma Controls how quickly the agent's memory decays, can be specified by the command-line option "--place_agent_gamma" + * Gamma default value is 0.05. + * @param move_lim Number of moves per temperature + */ + void set_step(float gamma, int move_lim); + + protected: + /** + * @brief Converts an action index to a move type. + * + * @param action_idx Specifies which action is selected by the agent. + * + * @return The move type associated with the selected action. + */ + inline e_move_type action_to_move_type_(size_t action_idx); + + /** + * @brief Converts an action index to a logical block type index. + * + * @param action_idx Specifies which action is selected by the agent. + * + * @return The logical block type index associated with the selected action. + */ + inline int action_to_blk_type_(size_t action_idx); + + /** + * @brief Converts an agent block type index to a logical block type index. + * Q-table only contains entries for block types that exist in the netlist. + * Agent block type index ranges from 0 to num_types_in_netlist-1. The agent + * chooses an agent block type and a move type. This function is used to + * convert the agent block type index (only usable within this class) to + * a logical block type index (usable by different move generators). + * + * @param idx Specifies the index by which this calls refers to a logical block type index. + * + * @return The referred logical block type index. + */ + inline int agent_to_phy_blk_type(int idx); + protected: float exp_alpha_ = -1; //Step size for q_ updates (< 0 implies use incremental average) size_t num_available_moves_; //Number of move types that agent can choose from to perform - size_t num_available_types_; //Number of block types that agent can choose to perform the move with - bool propose_blk_type = false; //Check if agent should propose both move and block type or only move type + size_t num_available_types_; //Number of block types that exist in the netlest. Agent may not choose the block type. + size_t num_available_actions_; //Total number of available actions + bool propose_blk_type_ = false; //Check if agent should propose both move and block type or only move type std::vector num_action_chosen_; //Number of times each arm has been pulled (n) std::vector q_; //Estimated value of each arm (Q) - size_t last_action_ = 0; //type of the last action (move type) proposed + size_t last_action_; //type of the last action (move type) proposed /* Ratios of the average runtime to calculate each move type */ /* These ratios are useful for different reward functions * * The vector is calculated by averaging many runs on different circuits */ std::vector time_elapsed_{1.0, 3.6, 5.4, 2.5, 2.1, 0.8, 2.2}; FILE* agent_info_file_ = nullptr; + + private: + /** + * @brief Iterates over all logical block types and check whether they exist in the + * netlist. Then, returns the logical block type indices found in the netlist. + * + * @return A vector containing all logical block type indices that exist in the netlist. + */ + static std::vector get_available_logical_blk_types_(); + + private: + std::vector action_logical_blk_type_; }; /** @@ -65,9 +121,8 @@ class KArmedBanditAgent { */ class EpsilonGreedyAgent : public KArmedBanditAgent { public: - EpsilonGreedyAgent(size_t num_moves, float epsilon); - EpsilonGreedyAgent(size_t num_moves, size_t num_types, float epsilon); - ~EpsilonGreedyAgent(); + EpsilonGreedyAgent(size_t num_moves, e_agent_space agent_space, float epsilon); + ~EpsilonGreedyAgent() override; t_propose_action propose_action() override; //Returns the type of the next action as well as the block type the agent wishes to perform @@ -85,19 +140,11 @@ class EpsilonGreedyAgent : public KArmedBanditAgent { */ void set_epsilon_action_prob(); - /** - * @brief Set step size for q-table updates - * - * @param gamma Controls how quickly the agent's memory decays, can be specified by the command-line option "--place_agent_gamma" - * Gamma default value is 0.05. - * @param move_lim Number of moves per temperature - */ - void set_step(float gamma, int move_lim); - + private: /** * @brief Initialize agent's Q-table and internal variable to zero (RL-agent learns everything throughout the placement run and has no prior knowledge) */ - void init_q_scores(); + void init_q_scores_(); private: float epsilon_ = 0.1; //How often to perform a non-greedy exploration action @@ -113,39 +160,29 @@ class EpsilonGreedyAgent : public KArmedBanditAgent { */ class SoftmaxAgent : public KArmedBanditAgent { public: - SoftmaxAgent(size_t num_moves); - SoftmaxAgent(size_t num_moves, size_t num_types); - ~SoftmaxAgent(); + SoftmaxAgent(size_t num_moves, e_agent_space agent_space); + ~SoftmaxAgent() override; //void process_outcome(double reward, std::string reward_fun) override; //Updates the agent based on the reward of the last proposed action t_propose_action propose_action() override; //Returns the type of the next action as well as the block type the agent wishes to perform - public: + private: + /** + * @brief Initialize agent's Q-table and internal variable to zero (RL-agent learns everything throughout the placement run and has no prior knowledge) + */ + void init_q_scores_(); + /** * @brief Calculate the fraction of total netlist blocks for each agent block type and will be used by the "set_action_prob" function. */ - void set_block_ratio(); + void set_block_ratio_(); /** * @brief Set action probability for all available actions. * If agent only proposes move type, the action probabilities would be equal for all move types at the beginning. * If agent proposes both move and block type, the action_prob for each action would be based on its block type count in the netlist. */ - void set_action_prob(); - - /** - * @brief Set step size for q-table updates - * - * @param gamma Controls how quickly the agent's memory decays, can be specified by the command-line option "--place_agent_gamma" - * Gamma default value is 0.05. - * @param move_lim Number of moves per temperature - */ - void set_step(float gamma, int move_lim); - - /** - * @brief Initialize agent's Q-table and internal variable to zero (RL-agent learns everything throughout the placement run and has no prior knowledge) - */ - void init_q_scores(); + void set_action_prob_(); private: std::vector exp_q_; //The clipped and scaled exponential of the estimated Q value for each action @@ -167,14 +204,37 @@ class SimpleRLMoveGenerator : public MoveGenerator { std::unique_ptr karmed_bandit_agent; // a pointer to the specific agent used (e.g. Softmax) public: - // constructors using a pointer to the agent used - SimpleRLMoveGenerator(std::unique_ptr& agent); - SimpleRLMoveGenerator(std::unique_ptr& agent); + // constructor using a pointer to the agent used + // the constructor + /** + * @brief Constructs an RL move generator using the passed agent + * + * @param agent std::unique_ptr to the agent. Only EpsilonGreedyAgent and SoftmaxAgent types are accepted + * by the constructor. If other types are passed, a compile error would be thrown. + */ + template::value || std::is_same::value>::type> + explicit SimpleRLMoveGenerator(std::unique_ptr& agent); // Updates affected_blocks with the proposed move, while respecting the current rlim - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& move_type, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities); + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) override; - // Recieves feedback about the outcome of the previously proposed move - void process_outcome(double reward, e_reward_function reward_fun); + // Receives feedback about the outcome of the previously proposed move + void process_outcome(double reward, e_reward_function reward_fun) override; }; + +template +SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr& agent) { + avail_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES); + + avail_moves[(int)e_move_type::UNIFORM] = std::make_unique(); + avail_moves[(int)e_move_type::MEDIAN] = std::make_unique(); + avail_moves[(int)e_move_type::CENTROID] = std::make_unique(); + avail_moves[(int)e_move_type::W_CENTROID] = std::make_unique(); + avail_moves[(int)e_move_type::W_MEDIAN] = std::make_unique(); + avail_moves[(int)e_move_type::CRIT_UNIFORM] = std::make_unique(); + avail_moves[(int)e_move_type::FEASIBLE_REGION] = std::make_unique(); + + karmed_bandit_agent = std::move(agent); +} #endif diff --git a/vpr/src/place/static_move_generator.cpp b/vpr/src/place/static_move_generator.cpp index 7d2d3101587..e927a95caaa 100644 --- a/vpr/src/place/static_move_generator.cpp +++ b/vpr/src/place/static_move_generator.cpp @@ -26,17 +26,17 @@ void StaticMoveGenerator::initialize_move_prob(const std::vector& prob) { total_prob = cumm_move_probs[cumm_move_probs.size() - 1]; } -e_create_move StaticMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& move_type, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { +e_create_move StaticMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { float rand_num = vtr::frand() * total_prob; for (size_t i = 0; i < cumm_move_probs.size(); i++) { if (rand_num <= cumm_move_probs[i]) { - move_type = (e_move_type)i; - return avail_moves[i]->propose_move(blocks_affected, move_type, blk_type, rlim, placer_opts, criticalities); + proposed_action.move_type = (e_move_type)i; + return avail_moves[i]->propose_move(blocks_affected, proposed_action, rlim, placer_opts, criticalities); } } VTR_ASSERT_MSG(false, vtr::string_fmt("During static probability move selection, random number (%g) exceeded total expected probabaility (%g)", rand_num, total_prob).c_str()); //Unreachable - move_type = (e_move_type)(avail_moves.size() - 1); - return avail_moves[avail_moves.size() - 1]->propose_move(blocks_affected, move_type, blk_type, rlim, placer_opts, criticalities); + proposed_action.move_type = (e_move_type)(avail_moves.size() - 1); + return avail_moves[avail_moves.size() - 1]->propose_move(blocks_affected, proposed_action, rlim, placer_opts, criticalities); } diff --git a/vpr/src/place/static_move_generator.h b/vpr/src/place/static_move_generator.h index b9c134360d5..e971dd02954 100644 --- a/vpr/src/place/static_move_generator.h +++ b/vpr/src/place/static_move_generator.h @@ -24,6 +24,6 @@ class StaticMoveGenerator : public MoveGenerator { public: StaticMoveGenerator(const std::vector& prob); - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& move_type, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities); + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) override; }; #endif diff --git a/vpr/src/place/uniform_move_generator.cpp b/vpr/src/place/uniform_move_generator.cpp index c979295e4f0..ae5b87cf28d 100644 --- a/vpr/src/place/uniform_move_generator.cpp +++ b/vpr/src/place/uniform_move_generator.cpp @@ -3,9 +3,9 @@ #include "place_constraints.h" #include "move_utils.h" -e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) { +e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) { //Find a movable block based on blk_type - ClusterBlockId b_from = propose_block_to_move(blk_type, false, NULL, NULL); + ClusterBlockId b_from = propose_block_to_move(proposed_action.logical_blk_type_index, false, nullptr, nullptr); if (!b_from) { //No movable block found return e_create_move::ABORT; diff --git a/vpr/src/place/uniform_move_generator.h b/vpr/src/place/uniform_move_generator.h index 34cdbfcce41..0ea4a8a9d8d 100644 --- a/vpr/src/place/uniform_move_generator.h +++ b/vpr/src/place/uniform_move_generator.h @@ -9,7 +9,7 @@ * a range limit centered on from_block in the compressed block grid space */ class UniformMoveGenerator : public MoveGenerator { - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/); + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) override; }; #endif diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp index 4e968680cba..c98d0fcfc51 100644 --- a/vpr/src/place/weighted_centroid_move_generator.cpp +++ b/vpr/src/place/weighted_centroid_move_generator.cpp @@ -4,9 +4,9 @@ #include "place_constraints.h" #include "move_utils.h" -e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { +e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { //Find a movable block based on blk_type - ClusterBlockId b_from = propose_block_to_move(blk_type, false, NULL, NULL); + ClusterBlockId b_from = propose_block_to_move(proposed_action.logical_blk_type_index, false, nullptr, nullptr); if (!b_from) { //No movable block found return e_create_move::ABORT; @@ -23,10 +23,9 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move auto grid_from_type = device_ctx.grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); - t_range_limiters range_limiters; - range_limiters.original_rlim = rlim; - range_limiters.first_rlim = place_move_ctx.first_rlim; - range_limiters.dm_rlim = placer_opts.place_dm_rlim; + t_range_limiters range_limiters{rlim, + place_move_ctx.first_rlim, + placer_opts.place_dm_rlim}; t_pl_loc to, centroid; diff --git a/vpr/src/place/weighted_centroid_move_generator.h b/vpr/src/place/weighted_centroid_move_generator.h index 6d6dc20359a..7aea1b6941c 100644 --- a/vpr/src/place/weighted_centroid_move_generator.h +++ b/vpr/src/place/weighted_centroid_move_generator.h @@ -13,7 +13,7 @@ * "Learn to Place: FPGA Placement using Reinforcement Learning and Directed Moves", ICFPT2020 */ class WeightedCentroidMoveGenerator : public MoveGenerator { - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities); + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) override; }; #endif diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp index a5e59fec044..152f0c2d731 100644 --- a/vpr/src/place/weighted_median_move_generator.cpp +++ b/vpr/src/place/weighted_median_move_generator.cpp @@ -9,9 +9,9 @@ static void get_bb_cost_for_net_excluding_block(ClusterNetId net_id, ClusterBlockId block_id, ClusterPinId moving_pin_id, const PlacerCriticalities* criticalities, t_bb_cost* coords, bool& skip_net); -e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { +e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) { //Find a movable block based on blk_type - ClusterBlockId b_from = propose_block_to_move(blk_type, false, NULL, NULL); + ClusterBlockId b_from = propose_block_to_move(proposed_action.logical_blk_type_index, false, nullptr, nullptr); if (!b_from) { //No movable block found return e_create_move::ABORT; @@ -68,7 +68,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& place_move_ctx.Y_coord.insert(place_move_ctx.Y_coord.end(), ceil(coords.ymax.criticality * CRIT_MULT_FOR_W_MEDIAN), coords.ymax.edge); } - if ((place_move_ctx.X_coord.size() == 0) || (place_move_ctx.Y_coord.size() == 0)) + if ((place_move_ctx.X_coord.empty()) || (place_move_ctx.Y_coord.empty())) return e_create_move::ABORT; //calculate the weighted median region @@ -91,10 +91,9 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& limit_coords.ymax = place_move_ctx.Y_coord[floor((place_move_ctx.Y_coord.size() - 1) / 2) + 1]; } - t_range_limiters range_limiters; - range_limiters.original_rlim = rlim; - range_limiters.dm_rlim = placer_opts.place_dm_rlim; - range_limiters.first_rlim = place_move_ctx.first_rlim; + t_range_limiters range_limiters{rlim, + place_move_ctx.first_rlim, + placer_opts.place_dm_rlim}; t_pl_loc w_median_point; w_median_point.x = (limit_coords.xmin + limit_coords.xmax) / 2; diff --git a/vpr/src/place/weighted_median_move_generator.h b/vpr/src/place/weighted_median_move_generator.h index 0ebc2d20b1c..c0be89b7c5f 100644 --- a/vpr/src/place/weighted_median_move_generator.h +++ b/vpr/src/place/weighted_median_move_generator.h @@ -13,7 +13,7 @@ * "Learn to Place: FPGA Placement using Reinforcement Learning and Directed Moves", ICFPT2020 */ class WeightedMedianMoveGenerator : public MoveGenerator { - e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities); + e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) override; }; #endif diff --git a/vpr/src/route/channel_stats.cpp b/vpr/src/route/channel_stats.cpp index 026a453232c..e5a2f1703e2 100644 --- a/vpr/src/route/channel_stats.cpp +++ b/vpr/src/route/channel_stats.cpp @@ -20,8 +20,8 @@ void print_channel_stats(bool is_flat) { histogram.emplace_back(0.9, 1.0); histogram.emplace_back(1.0, std::numeric_limits::infinity()); - auto chanx_usage = calculate_routing_usage(CHANX, is_flat); - auto chany_usage = calculate_routing_usage(CHANY, is_flat); + auto chanx_usage = calculate_routing_usage(CHANX, is_flat, true); + auto chany_usage = calculate_routing_usage(CHANY, is_flat, true); auto chanx_avail = calculate_routing_avail(CHANX); auto chany_avail = calculate_routing_avail(CHANY); diff --git a/vpr/src/route/connection_based_routing.cpp b/vpr/src/route/connection_based_routing.cpp index 972e5bf31e7..eaa4c85ff26 100644 --- a/vpr/src/route/connection_based_routing.cpp +++ b/vpr/src/route/connection_based_routing.cpp @@ -22,15 +22,7 @@ Connection_based_routing_resources::Connection_based_routing_resources(const Net * reached_rt_sinks will also reserve enough space, but instead of * indices, it will store the pointers to route tree nodes */ - // can have as many targets as sink pins (total number of pins - SOURCE pin) - // supposed to be used as persistent vector growing with push_back and clearing at the start of each net routing iteration - auto max_sink_pins_per_net = std::max(get_max_pins_per_net(net_list_) - 1, 0); - remaining_targets.reserve(max_sink_pins_per_net); - reached_rt_sinks.reserve(max_sink_pins_per_net); - size_t routing_num_nets = net_list_.nets().size(); - remaining_targets.resize(routing_num_nets); - reached_rt_sinks.resize(routing_num_nets); lower_bound_connection_delay.resize(routing_num_nets); forcible_reroute_connection_flag.resize(routing_num_nets); @@ -114,7 +106,7 @@ bool Connection_based_routing_resources::forcibly_reroute_connections(float max_ forcible_reroute_connection_flag[net_id][rr_sink_node] = true; // note that we don't set forcible_reroute_connection_flag to false when the converse is true - // resetting back to false will be done during tree pruning, after the sink has been legally reached + // resetting back to false will be done during tree pruning, after the sink has been legally reached [!] any_connection_rerouted = true; profiling::mark_for_forced_reroute(); diff --git a/vpr/src/route/connection_based_routing.h b/vpr/src/route/connection_based_routing.h index 59f0edf2936..0f0faaaace5 100644 --- a/vpr/src/route/connection_based_routing.h +++ b/vpr/src/route/connection_based_routing.h @@ -14,35 +14,10 @@ // pruning the route tree of large fanouts. Instead of rerouting to each sink of a congested net, // reroute only the connections to the ones that did not have a legal connection the previous time class Connection_based_routing_resources { - /** Holds remaining target pin indices (if this net has a RouteTree from the previous - * iteration, its prune() call will update this) (should be moved into RouteTree) */ - vtr::vector> remaining_targets; - - /** Holds RRNodeIds of legally reached sinks. Used to build the external rt_node_to_sink - * lookup. (should be moved into RouteTree)*/ - vtr::vector> reached_rt_sinks; - public: Connection_based_routing_resources(const Netlist<>& net_list, const vtr::vector>& net_terminals, bool is_flat); - // adding to the resources when they are reached during pruning - // mark rr sink node as something that still needs to be reached - void toreach_rr_sink(ParentNetId net_id, int rr_sink_node) { - remaining_targets[net_id].push_back(rr_sink_node); - } - // mark rt sink node as something that has been legally reached - void reached_rt_sink(ParentNetId net_id, RRNodeId rt_sink) { - reached_rt_sinks[net_id].push_back(rt_sink); - } - - // get a handle on the resources - std::vector& get_remaining_targets(ParentNetId net_id) { - return remaining_targets[net_id]; - } - std::vector& get_reached_rt_sinks(ParentNetId net_id) { - return reached_rt_sinks[net_id]; - } bool sanity_check_lookup() const; @@ -87,11 +62,6 @@ class Connection_based_routing_resources { //Updates the connection delay lower bound (if less than current best found) void update_lower_bound_connection_delay(ParentNetId net, int ipin, float delay); - void prepare_routing_for_net(ParentNetId net_id) { - remaining_targets[net_id].clear(); - reached_rt_sinks[net_id].clear(); - } - // get a handle on the resources float get_stable_critical_path_delay() const { return last_stable_critical_path_delay; } diff --git a/vpr/src/route/partition_tree.cpp b/vpr/src/route/partition_tree.cpp index f896d93bc94..d3d895493b5 100644 --- a/vpr/src/route/partition_tree.cpp +++ b/vpr/src/route/partition_tree.cpp @@ -1,11 +1,12 @@ #include "partition_tree.h" +#include #include PartitionTree::PartitionTree(const Netlist<>& netlist) { const auto& device_ctx = g_vpr_ctx.device(); auto all_nets = std::vector(netlist.nets().begin(), netlist.nets().end()); - _root = build_helper(netlist, all_nets, 0, 0, device_ctx.grid.width(), device_ctx.grid.height()); + _root = build_helper(netlist, all_nets, 0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); } std::unique_ptr PartitionTree::build_helper(const Netlist<>& netlist, const std::vector& nets, int x1, int y1, int x2, int y2) { @@ -15,116 +16,122 @@ std::unique_ptr PartitionTree::build_helper(const Netlist<>& const auto& route_ctx = g_vpr_ctx.routing(); auto out = std::make_unique(); - /* Find best cutline. In ParaDRo this is done using prefix sums, but - * life is too short to implement them, therefore I'm just doing a linear search, - * and the complexity is O((fpga width + height) * #nets * log2(w+h * #nets)). - * What we are searching for is the cutline with the most balanced workload (# of fanouts) - * on the sides. */ - int left, right, mine; - int score; - /* TODO: maybe put all of this into a tuple or struct? */ - int best_score = std::numeric_limits::max(); - int best_pos = -1, best_left = -1, best_right = -1; - enum { X, - Y } best_axis - = X; + /* Build ParaDRo-ish prefix sum lookup for each bin (coordinate) in the device. + * Do this for every step with only given nets, because each cutline takes some nets out + * of the game, so if we just built a global lookup it wouldn't yield accurate results. + * + * VPR's bounding boxes include the borders (see ConnectionRouter::timing_driven_expand_neighbour()) + * so try to include x=bb.xmax, y=bb.ymax etc. when calculating things. */ + int W = x2 - x1 + 1; + int H = y2 - y1 + 1; - for (int x = x1 + 1; x < x2; x++) { - left = right = mine = 0; - for (auto net_id : nets) { - t_bb bb = route_ctx.route_bb[net_id]; - size_t fanout = netlist.net_sinks(net_id).size(); - if (bb.xmin < x && bb.xmax < x) { - left += fanout; - } else if (bb.xmin > x && bb.xmax > x) { - right += fanout; - } else if (bb.xmin <= x && bb.xmax >= x) { - mine += fanout; - } else { - VTR_ASSERT(false); /* unreachable */ - } + VTR_ASSERT(W > 1 && H > 1); + /* Cutlines are placed between integral coordinates. + * For instance, x_total_before[0] assumes a cutline at x=0.5, so fanouts at x=0 are included but not + * x=1. It's similar for x_total_after[0], which excludes fanouts at x=0 and includes x=1. + * Note that we have W-1 possible cutlines for a W-wide box. */ + std::vector x_total_before(W - 1, 0), x_total_after(W - 1, 0); + std::vector y_total_before(H - 1, 0), y_total_after(H - 1, 0); + + for (auto net_id : nets) { + t_bb bb = route_ctx.route_bb[net_id]; + size_t fanouts = netlist.net_sinks(net_id).size(); + + /* Inclusive start and end coords of the bbox relative to x1. Clamp to [x1, x2]. */ + int x_start = std::max(x1, bb.xmin) - x1; + int x_end = std::min(bb.xmax, x2) - x1; + /* Fill in the lookups assuming a cutline at x + 0.5. */ + for (int x = x_start; x < W - 1; x++) { + x_total_before[x] += fanouts; + } + for (int x = 0; x < x_end; x++) { + x_total_after[x] += fanouts; + } + int y_start = std::max(y1, bb.ymin) - y1; + int y_end = std::min(bb.ymax, y2) - y1; + for (int y = y_start; y < H - 1; y++) { + y_total_before[y] += fanouts; } - score = abs(left - right); + for (int y = 0; y < y_end; y++) { + y_total_after[y] += fanouts; + } + } + + int best_score = std::numeric_limits::max(); + float best_pos = std::numeric_limits::quiet_NaN(); + Axis best_axis = Axis::X; + + int max_x_before = x_total_before[W - 2]; + int max_x_after = x_total_after[0]; + for (int x = 0; x < W - 1; x++) { + int before = x_total_before[x]; + int after = x_total_after[x]; + if (before == max_x_before || after == max_x_after) /* Cutting here would leave no nets to the left or right */ + continue; + int score = abs(x_total_before[x] - x_total_after[x]); if (score < best_score) { best_score = score; - best_left = left; - best_right = right; - best_pos = x; - best_axis = X; + best_pos = x1 + x + 0.5; /* Lookups are relative to (x1, y1) */ + best_axis = Axis::X; } } - for (int y = y1 + 1; y < y2; y++) { - left = right = mine = 0; - for (auto net_id : nets) { - t_bb bb = route_ctx.route_bb[net_id]; - size_t fanout = netlist.net_sinks(net_id).size(); - if (bb.ymin < y && bb.ymax < y) { - left += fanout; - } else if (bb.ymin > y && bb.ymax > y) { - right += fanout; - } else if (bb.ymin <= y && bb.ymax >= y) { - mine += fanout; - } else { - VTR_ASSERT(false); /* unreachable */ - } - } - score = abs(left - right); + + int max_y_before = y_total_before[H - 2]; + int max_y_after = y_total_after[0]; + for (int y = 0; y < H - 1; y++) { + int before = y_total_before[y]; + int after = y_total_after[y]; + if (before == max_y_before || after == max_y_after) /* Cutting here would leave no nets to the left or right (sideways) */ + continue; + int score = abs(y_total_before[y] - y_total_after[y]); if (score < best_score) { best_score = score; - best_left = left; - best_right = right; - best_pos = y; - best_axis = Y; + best_pos = y1 + y + 0.5; /* Lookups are relative to (x1, y1) */ + best_axis = Axis::Y; } } - /* If one of the sides has 0 nets in the best arrangement, - * there's no use in partitioning this: no parallelism comes out of it. */ - if (best_left == 0 || best_right == 0) { - out->nets = std::move(nets); + /* Couldn't find a cutline: all cutlines result in a one-way cut */ + if (std::isnan(best_pos)) { + out->nets = nets; /* We hope copy elision is smart enough to optimize this stuff out */ return out; } - /* Populate net IDs on each side - * and call next level of build_partition_trees. */ + /* Populate net IDs on each side and call next level of build_x */ std::vector left_nets, right_nets, my_nets; - if (best_axis == X) { + if (best_axis == Axis::X) { for (auto net_id : nets) { t_bb bb = route_ctx.route_bb[net_id]; - if (bb.xmin < best_pos && bb.xmax < best_pos) { + if (bb.xmax < best_pos) { left_nets.push_back(net_id); - } else if (bb.xmin > best_pos && bb.xmax > best_pos) { + } else if (bb.xmin > best_pos) { right_nets.push_back(net_id); - } else if (bb.xmin <= best_pos && bb.xmax >= best_pos) { - my_nets.push_back(net_id); } else { - VTR_ASSERT(false); /* unreachable */ + my_nets.push_back(net_id); } } - out->left = build_helper(netlist, left_nets, x1, y1, best_pos, y2); - out->right = build_helper(netlist, right_nets, best_pos, y2, x2, y2); + out->left = build_helper(netlist, left_nets, x1, y1, std::floor(best_pos), y2); + out->right = build_helper(netlist, right_nets, std::floor(best_pos + 1), y1, x2, y2); } else { - VTR_ASSERT(best_axis == Y); + VTR_ASSERT(best_axis == Axis::Y); for (auto net_id : nets) { t_bb bb = route_ctx.route_bb[net_id]; - if (bb.ymin < best_pos && bb.ymax < best_pos) { + if (bb.ymax < best_pos) { left_nets.push_back(net_id); - } else if (bb.ymin > best_pos && bb.ymax > best_pos) { + } else if (bb.ymin > best_pos) { right_nets.push_back(net_id); - } else if (bb.ymin <= best_pos && bb.ymax >= best_pos) { - my_nets.push_back(net_id); } else { - VTR_ASSERT(false); /* unreachable */ + my_nets.push_back(net_id); } } - out->left = build_helper(netlist, left_nets, x1, best_pos, x2, y2); - out->right = build_helper(netlist, right_nets, x1, y1, x2, best_pos); + out->left = build_helper(netlist, left_nets, x1, y1, x2, std::floor(best_pos)); + out->right = build_helper(netlist, right_nets, x1, std::floor(best_pos + 1), x2, y2); } - out->nets = std::move(my_nets); + out->nets = my_nets; out->cutline_axis = best_axis; out->cutline_pos = best_pos; return out; diff --git a/vpr/src/route/partition_tree.h b/vpr/src/route/partition_tree.h index 97988d5fdbb..08eb668a88f 100644 --- a/vpr/src/route/partition_tree.h +++ b/vpr/src/route/partition_tree.h @@ -3,6 +3,28 @@ #include "connection_router.h" #include "router_stats.h" +#include +#include +#include +#include + +#ifdef VPR_USE_TBB +# include +#endif + +/** Self-descriptive */ +enum class Axis { X, + Y }; + +/** Which side of a line? */ +enum class Side { LEFT = 0, + RIGHT = 1 }; + +/** Invert side */ +inline Side operator!(const Side& rhs) { + return Side(!size_t(rhs)); +} + /** Routing iteration results per thread. (for a subset of the input netlist) */ struct RouteIterResults { /** Are there any connections impossible to route due to a disconnected rr_graph? */ @@ -38,11 +60,12 @@ class PartitionTreeNode { bool is_routable = false; /** Net IDs for which timing_driven_route_net() actually got called */ std::vector rerouted_nets; - - /* debug stuff */ - int cutline_axis = -1; - int cutline_pos = -1; - std::vector exec_times; + /* Axis of the cutline. */ + Axis cutline_axis = Axis::X; + /* Position of the cutline. It's a float, because cutlines are considered to be "between" integral coordinates. */ + float cutline_pos = std::numeric_limits::quiet_NaN(); + /* Bounding box of *this* node. (The cutline cuts this box) */ + t_bb bb; }; /** Holds the root PartitionTreeNode and exposes top level operations. */ @@ -64,3 +87,34 @@ class PartitionTree { std::unique_ptr _root; std::unique_ptr build_helper(const Netlist<>& netlist, const std::vector& nets, int x1, int y1, int x2, int y2); }; + +#ifdef VPR_DEBUG_PARTITION_TREE +/** Log PartitionTree-related messages. Can handle multiple threads. */ +class PartitionTreeDebug { + public: +# ifdef VPR_USE_TBB + static inline tbb::concurrent_vector lines; +# else + static inline std::vector lines; +# endif + /** Add msg to the log buffer (with a thread ID header) */ + static inline void log(std::string msg) { + auto thread_id = std::hash()(std::this_thread::get_id()); + lines.push_back("[thread " + std::to_string(thread_id) + "] " + msg); + } + /** Write out the log buffer into a file */ + static inline void write(std::string filename) { + std::ofstream f(filename); + for (auto& line : lines) { + f << line << std::endl; + } + f.close(); + } +}; +#else +class PartitionTreeDebug { + public: + static inline void log(std::string /* msg */) {} + static inline void write(std::string /* filename */) {} +}; +#endif diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index 881dbfd46aa..99d116b0de6 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -520,14 +520,13 @@ void mark_ends(const Netlist<>& net_list, ParentNetId net_id) { } } -void mark_remaining_ends(ParentNetId net_id, const std::vector& remaining_sinks) { - // like mark_ends, but only performs it for the remaining sinks of a net - RRNodeId inode; - +/** like mark_ends, but only performs it for the remaining sinks of a net */ +void mark_remaining_ends(ParentNetId net_id) { auto& route_ctx = g_vpr_ctx.mutable_routing(); + const auto& tree = route_ctx.route_trees[net_id].value(); - for (int sink_pin : remaining_sinks) { - inode = route_ctx.net_rr_terminals[net_id][sink_pin]; + for (int sink_pin : tree.get_remaining_isinks()) { + RRNodeId inode = route_ctx.net_rr_terminals[net_id][sink_pin]; ++route_ctx.rr_node_route_inf[inode].target_flag; } } diff --git a/vpr/src/route/route_common.h b/vpr/src/route/route_common.h index 4a7d1a2cf76..68e525e10b0 100644 --- a/vpr/src/route/route_common.h +++ b/vpr/src/route/route_common.h @@ -89,7 +89,7 @@ inline float get_single_rr_cong_cost(RRNodeId inode, float pres_fac) { void mark_ends(const Netlist<>& net_list, ParentNetId net_id); -void mark_remaining_ends(ParentNetId net_id, const std::vector& remaining_sinks); +void mark_remaining_ends(ParentNetId net_id); void add_to_mod_list(RRNodeId inode, std::vector& modified_rr_node_inf); diff --git a/vpr/src/route/route_parallel.cpp b/vpr/src/route/route_parallel.cpp index 1e50f758b31..96e6464f62b 100644 --- a/vpr/src/route/route_parallel.cpp +++ b/vpr/src/route/route_parallel.cpp @@ -33,7 +33,6 @@ # include "tbb/enumerable_thread_specific.h" # include "tbb/task_group.h" -# include "tbb/global_control.h" /** route_net and similar functions need many bits of state collected from various * parts of VPR, collect them here for ease of use */ @@ -102,6 +101,12 @@ static bool try_parallel_route_tmpl(const Netlist<>& netlist, ScreenUpdatePriority first_iteration_priority, bool is_flat); +template +static RouteIterResults route_with_partition_tree(tbb::task_group& g, RouteIterCtx& ctx); + +template +static RouteIterResults route_without_partition_tree(std::vector& nets_to_route, RouteIterCtx& ctx); + /************************ Subroutine definitions *****************************/ bool try_parallel_route(const Netlist<>& net_list, @@ -326,12 +331,6 @@ bool try_parallel_route_tmpl(const Netlist<>& net_list, is_flat); } - /* Build partition tree for parallel routing */ - vtr::Timer t; - PartitionTree partition_tree(net_list); - float total_prep_time = t.elapsed_sec(); - VTR_LOG("# Built partition tree in %f seconds\n", total_prep_time); - tbb::task_group tbb_task_group; /* Set up thread local storage. @@ -414,7 +413,9 @@ bool try_parallel_route_tmpl(const Netlist<>& net_list, choking_spots, is_flat}; - RouteIterResults iter_results = route_partition_tree(tbb_task_group, partition_tree, iter_ctx); + vtr::Timer net_routing_timer; + RouteIterResults iter_results = route_with_partition_tree(tbb_task_group, iter_ctx); + PartitionTreeDebug::log("Routing all nets took " + std::to_string(net_routing_timer.elapsed_sec()) + " s"); if (!iter_results.is_routable) { return false; // Impossible to route @@ -478,6 +479,7 @@ bool try_parallel_route_tmpl(const Netlist<>& net_list, //Output progress print_route_status(itry, iter_elapsed_time, pres_fac, num_net_bounding_boxes_updated, iter_results.stats, overuse_info, wirelength_info, timing_info, est_success_iteration); + PartitionTreeDebug::log("Iteration " + std::to_string(itry) + " took " + std::to_string(iter_elapsed_time) + " s"); prev_iter_cumm_time = iter_cumm_time; @@ -593,8 +595,7 @@ bool try_parallel_route_tmpl(const Netlist<>& net_list, */ if (router_opts.route_bb_update == e_route_bb_update::DYNAMIC) { - /** TODO: Disabled BB scaling for the baseline parallel router. Should re-enable it by building/updating partition tree on every iteration */ - // num_net_bounding_boxes_updated = dynamic_update_bounding_boxes(iter_results.rerouted_nets, net_list, router_opts.high_fanout_threshold); + num_net_bounding_boxes_updated = dynamic_update_bounding_boxes(iter_results.rerouted_nets, net_list, router_opts.high_fanout_threshold); } if (itry >= high_effort_congestion_mode_iteration_threshold) { @@ -665,8 +666,7 @@ bool try_parallel_route_tmpl(const Netlist<>& net_list, //Scale by BB_SCALE_FACTOR but clip to grid size to avoid overflow bb_fac = std::min(max_grid_dim, bb_fac * BB_SCALE_FACTOR); - /** TODO: Disabled BB scaling for the baseline parallel router. Should re-enable it by building/updating partition tree on every iteration */ - // route_ctx.route_bb = load_route_bb(net_list, bb_fac); + route_ctx.route_bb = load_route_bb(net_list, bb_fac); } ++itry_conflicted_mode; @@ -795,6 +795,7 @@ bool try_parallel_route_tmpl(const Netlist<>& net_list, VTR_LOG("total_number_of_adding_all_rt_from_calling_high_fanout_rt: %zu ", router_stats.add_all_rt_from_high_fanout); VTR_LOG("\n"); + PartitionTreeDebug::write("partition_tree.log"); return routing_is_successful; } @@ -814,7 +815,6 @@ NetResultFlags try_parallel_route_net(ConnectionRouter& router, CBRR& connections_inf, RouterStats& router_stats, std::vector& pin_criticality, - std::vector>& rt_node_of_sink, NetPinsMatrix& net_delay, const ClusteredPinAtomPinsLookup& netlist_pin_lookup, std::shared_ptr timing_info, @@ -828,14 +828,11 @@ NetResultFlags try_parallel_route_net(ConnectionRouter& router, NetResultFlags flags; - connections_inf.prepare_routing_for_net(net_id); - bool reroute_for_hold = false; if (budgeting_inf.if_set()) { reroute_for_hold = (budgeting_inf.get_should_reroute(net_id)); reroute_for_hold &= worst_negative_slack != 0; } - if (route_ctx.net_status.is_fixed(net_id)) { /* Skip pre-routed nets. */ flags.success = true; } else if (net_list.net_is_ignored(net_id)) { /* Skip ignored nets. */ @@ -856,7 +853,6 @@ NetResultFlags try_parallel_route_net(ConnectionRouter& router, connections_inf, router_stats, pin_criticality, - rt_node_of_sink, net_delay[net_id].data(), netlist_pin_lookup, timing_info, @@ -878,6 +874,7 @@ NetResultFlags try_parallel_route_net(ConnectionRouter& router, flags.was_rerouted = true; //Flag to record whether routing was actually changed } + return flags; } @@ -895,8 +892,6 @@ void route_partition_tree_helper(tbb::task_group& g, node.is_routable = true; node.rerouted_nets.clear(); - std::cout << "routing node with " << node.nets.size() << " nets\n"; - vtr::Timer t; for (auto net_id : node.nets) { auto flags = try_parallel_route_net( @@ -909,7 +904,6 @@ void route_partition_tree_helper(tbb::task_group& g, ctx.connections_inf, ctx.router_stats.local(), ctx.route_structs.local().pin_criticality, - ctx.route_structs.local().rt_node_of_sink, ctx.net_delay, ctx.netlist_pin_lookup, ctx.timing_info, @@ -933,7 +927,8 @@ void route_partition_tree_helper(tbb::task_group& g, nets_to_retry[net_id] = true; } } - node.exec_times.push_back(t.elapsed_sec()); + + PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets routed in " + std::to_string(t.elapsed_sec()) + " s"); /* add left and right trees to task queue */ if (node.left && node.right) { @@ -944,7 +939,7 @@ void route_partition_tree_helper(tbb::task_group& g, route_partition_tree_helper(g, *node.right, ctx, nets_to_retry); }); } else { - VTR_ASSERT(!node.left && !node.right); // tree should have been built perfectly balanced + VTR_ASSERT(!node.left && !node.right); // there shouldn't be a node with a single branch } } @@ -1003,4 +998,60 @@ RouteIterResults route_partition_tree(tbb::task_group& g, return out; } +/* Build a partition tree and route with it */ +template +static RouteIterResults route_with_partition_tree(tbb::task_group& g, RouteIterCtx& ctx) { + vtr::Timer t2; + PartitionTree partition_tree(ctx.net_list); + float total_prep_time = t2.elapsed_sec(); + VTR_LOG("# Built partition tree in %f seconds\n", total_prep_time); + + return route_partition_tree(g, partition_tree, ctx); +} + +/* Route serially */ +template +static RouteIterResults route_without_partition_tree(std::vector& nets_to_route, RouteIterCtx& ctx) { + RouteIterResults out; + + /* Sort so net with most sinks is routed first. */ + std::sort(nets_to_route.begin(), nets_to_route.end(), [&](const ParentNetId id1, const ParentNetId id2) -> bool { + return ctx.net_list.net_sinks(id1).size() > ctx.net_list.net_sinks(id2).size(); + }); + + for (auto net_id : nets_to_route) { + auto flags = try_timing_driven_route_net( + ctx.routers.local(), + ctx.net_list, + net_id, + ctx.itry, + ctx.pres_fac, + ctx.router_opts, + ctx.connections_inf, + ctx.router_stats.local(), + ctx.route_structs.local().pin_criticality, + ctx.route_structs.local().rt_node_of_sink, + ctx.net_delay, + ctx.netlist_pin_lookup, + ctx.timing_info, + ctx.pin_timing_invalidator, + ctx.budgeting_inf, + ctx.worst_negative_slack, + ctx.routing_predictor, + ctx.choking_spots[net_id], + ctx.is_flat); + + if (!flags.success) { + out.is_routable = false; + } + if (flags.was_rerouted) { + out.rerouted_nets.push_back(net_id); + } + } + + update_router_stats(out.stats, ctx.router_stats.local()); + + return out; +} + #endif // VPR_USE_TBB diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index 9a2197ed3ca..62930ad2555 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -94,7 +94,6 @@ static NetResultFlags timing_driven_route_sink(ConnectionRouter& router, const t_conn_cost_params cost_params, const t_router_opts& router_opts, RouteTree& tree, - std::vector>& rt_node_of_sink, SpatialRouteTreeLookup& spatial_rt_lookup, RouterStats& router_stats, route_budgets& budgeting_inf, @@ -124,13 +123,11 @@ static void setup_routing_resources(int itry, unsigned num_sinks, int min_incremental_reroute_fanout, CBRR& connections_inf, - std::vector>& rt_node_of_sink, const t_router_opts& router_opts, bool ripup_high_fanout_nets); static void update_net_delays_from_route_tree(float* net_delay, const Netlist<>& net_list, - std::vector>& rt_node_of_sink, ParentNetId inet, TimingInfo* timing_info, NetPinTimingInvalidator* pin_timing_invalidator); @@ -469,7 +466,6 @@ bool try_timing_driven_route_tmpl(const Netlist<>& net_list, connections_inf, router_iteration_stats, route_structs.pin_criticality, - route_structs.rt_node_of_sink, net_delay, netlist_pin_lookup, route_timing_info, @@ -868,7 +864,6 @@ NetResultFlags try_timing_driven_route_net(ConnectionRouter& router, CBRR& connections_inf, RouterStats& router_stats, std::vector& pin_criticality, - std::vector>& rt_node_of_sink, NetPinsMatrix& net_delay, const ClusteredPinAtomPinsLookup& netlist_pin_lookup, std::shared_ptr timing_info, @@ -882,8 +877,6 @@ NetResultFlags try_timing_driven_route_net(ConnectionRouter& router, NetResultFlags flags; - connections_inf.prepare_routing_for_net(net_id); - bool reroute_for_hold = false; if (budgeting_inf.if_set()) { reroute_for_hold = (budgeting_inf.get_should_reroute(net_id)); @@ -909,7 +902,6 @@ NetResultFlags try_timing_driven_route_net(ConnectionRouter& router, connections_inf, router_stats, pin_criticality, - rt_node_of_sink, net_delay[net_id].data(), netlist_pin_lookup, timing_info, @@ -955,7 +947,6 @@ NetResultFlags timing_driven_route_net(ConnectionRouter& router, CBRR& connections_inf, RouterStats& router_stats, std::vector& pin_criticality, - std::vector>& rt_node_of_sink, float* net_delay, const ClusteredPinAtomPinsLookup& netlist_pin_lookup, std::shared_ptr timing_info, @@ -982,7 +973,6 @@ NetResultFlags timing_driven_route_net(ConnectionRouter& router, num_sinks, router_opts.min_incremental_reroute_fanout, connections_inf, - rt_node_of_sink, router_opts, check_hold(router_opts, worst_neg_slack)); @@ -1001,7 +991,7 @@ NetResultFlags timing_driven_route_net(ConnectionRouter& router, // after this point the route tree is correct // remaining_targets from this point on are the **pin indices** that have yet to be routed - auto& remaining_targets = connections_inf.get_remaining_targets(net_id); + std::vector remaining_targets(tree.get_remaining_isinks().begin(), tree.get_remaining_isinks().end()); // calculate criticality of remaining target pins for (int ipin : remaining_targets) { @@ -1102,7 +1092,6 @@ NetResultFlags timing_driven_route_net(ConnectionRouter& router, cost_params, router_opts, tree, - rt_node_of_sink, spatial_route_tree_lookup, router_stats, budgeting_inf, @@ -1132,7 +1121,6 @@ NetResultFlags timing_driven_route_net(ConnectionRouter& router, // may have to update timing delay of the previously legally reached sinks since downstream capacitance could be changed update_net_delays_from_route_tree(net_delay, net_list, - rt_node_of_sink, net_id, timing_info.get(), pin_timing_invalidator); @@ -1249,7 +1237,6 @@ static NetResultFlags timing_driven_route_sink(ConnectionRouter& router, const t_conn_cost_params cost_params, const t_router_opts& router_opts, RouteTree& tree, - std::vector>& rt_node_of_sink, SpatialRouteTreeLookup& spatial_rt_lookup, RouterStats& router_stats, route_budgets& budgeting_inf, @@ -1343,8 +1330,6 @@ static NetResultFlags timing_driven_route_sink(ConnectionRouter& router, } } - rt_node_of_sink[target_pin] = new_sink; - /* update global occupancy from the new branch */ if (new_branch) pathfinder_update_cost_from_route_tree(new_branch.value(), 1); @@ -1364,14 +1349,11 @@ static void setup_routing_resources(int itry, unsigned num_sinks, int min_incremental_reroute_fanout, CBRR& connections_inf, - std::vector>& rt_node_of_sink, const t_router_opts& router_opts, bool ripup_high_fanout_nets) { /* Build and return a partial route tree from the legal connections from last iteration. * along the way do: * update pathfinder costs to be accurate to the partial route tree - * find and store the pins that still need to be reached in incremental_rerouting_resources.remaining_targets - * find and store the rt nodes that have been reached in incremental_rerouting_resources.reached_rt_sinks * mark the rr_node sinks as targets to be reached. */ auto& route_ctx = g_vpr_ctx.mutable_routing(); @@ -1390,9 +1372,8 @@ static void setup_routing_resources(int itry, /* re-initialize net */ tree = RouteTree(net_id); + pathfinder_update_cost_from_route_tree(tree.value().root(), 1); - for (unsigned int sink_pin = 1; sink_pin <= num_sinks; ++sink_pin) - connections_inf.toreach_rr_sink(net_id, sink_pin); // since all connections will be rerouted for this net, clear all of net's forced reroute flags connections_inf.clear_force_reroute_for_net(net_id); @@ -1401,16 +1382,15 @@ static void setup_routing_resources(int itry, // of their versions that act on node indices directly like mark_remaining_ends mark_ends(net_list, net_id); } else { - auto& reached_sinks = connections_inf.get_reached_rt_sinks(net_id); - auto& remaining_targets = connections_inf.get_remaining_targets(net_id); - profiling::net_rebuild_start(); - if (!tree) + if (!tree) { tree = RouteTree(net_id); + pathfinder_update_cost_from_route_tree(tree.value().root(), 1); + } /* copy the existing routing - * prune_route_tree depends on global occ, so we can't subtract before pruning + * prune() depends on global occ, so we can't subtract before pruning * OPT: to skip this copy, return a "diff" from RouteTree::prune */ RouteTree tree2 = tree.value(); @@ -1435,19 +1415,12 @@ static void setup_routing_resources(int itry, // Initialize only to source tree = RouteTree(net_id); + pathfinder_update_cost_from_route_tree(tree.value().root(), 1); } - VTR_ASSERT(reached_sinks.size() + remaining_targets.size() == num_sinks); + profiling::net_rebuild_end(num_sinks, tree->get_remaining_isinks().size()); - // give lookup on the reached sinks - for (RRNodeId sink_rr_node : reached_sinks) { - auto& sink_node = tree.value().find_by_rr_id(sink_rr_node).value(); - rt_node_of_sink[sink_node.net_pin_index] = sink_node; - } - - profiling::net_rebuild_end(num_sinks, remaining_targets.size()); - - // still need to calculate the tree's time delay (0 Tarrival means from SOURCE) + // still need to calculate the tree's time delay tree.value().reload_timing(); // check for R_upstream C_downstream and edge correctness @@ -1457,7 +1430,7 @@ static void setup_routing_resources(int itry, VTR_ASSERT_SAFE(tree.value().is_uncongested()); // mark remaining ends - mark_remaining_ends(net_id, remaining_targets); + mark_remaining_ends(net_id); // mark the lookup (rr_node_route_inf) for existing tree elements as NO_PREVIOUS so add_to_path stops when it reaches one of them update_rr_route_inf_from_tree(tree.value().root()); @@ -1540,21 +1513,14 @@ bool timing_driven_check_net_delays(const Netlist<>& net_list, NetPinsMatrix& net_list, - std::vector>& rt_node_of_sink, ParentNetId inet, TimingInfo* timing_info, NetPinTimingInvalidator* pin_timing_invalidator) { - for (unsigned int isink = 1; isink < net_list.net_pins(inet).size(); isink++) { - float new_delay = rt_node_of_sink[isink]->Tdel; - - if (pin_timing_invalidator && new_delay != net_delay[isink]) { - //Delay changed, invalidate for incremental timing update - VTR_ASSERT_SAFE(timing_info); - ParentPinId pin = net_list.net_pin(inet, isink); - pin_timing_invalidator->invalidate_connection(pin, timing_info); - } + auto& route_ctx = g_vpr_ctx.routing(); + const RouteTree& tree = route_ctx.route_trees[inet].value(); - net_delay[isink] = new_delay; + for (unsigned int isink = 1; isink < net_list.net_pins(inet).size(); isink++) { + update_net_delay_from_isink(net_delay, tree, isink, net_list, inet, timing_info, pin_timing_invalidator); } } @@ -1571,8 +1537,10 @@ bool should_route_net(ParentNetId net_id, return true; } + const RouteTree& tree = route_ctx.route_trees[net_id].value(); + /* Walk over all rt_nodes in the net */ - for (auto& rt_node : route_ctx.route_trees[net_id]->all_nodes()) { + for (auto& rt_node : tree.all_nodes()) { RRNodeId inode = rt_node.inode; int occ = route_ctx.rr_node_route_inf[inode].occ(); int capacity = rr_graph.node_capacity(inode); @@ -1591,9 +1559,11 @@ bool should_route_net(ParentNetId net_id, } } - VTR_ASSERT(connections_inf.get_remaining_targets(net_id).empty()); + /* If all sinks have been routed to without overuse, no need to route this */ + if (tree.get_remaining_isinks().empty()) + return false; - return false; /* Current route has no overuse */ + return true; } bool early_exit_heuristic(const t_router_opts& router_opts, const WirelengthInfo& wirelength_info) { @@ -1873,6 +1843,8 @@ size_t dynamic_update_bounding_boxes(const std::vector& updated_net for (ParentNetId net : updated_nets) { if (!route_ctx.route_trees[net]) continue; // Skip if no routing + if (!route_ctx.net_status.is_routed(net)) + continue; //We do not adjust the bounding boxes of high fanout nets, since they //use different bounding boxes based on the target location. @@ -2100,7 +2072,6 @@ void prune_unused_non_configurable_nets(CBRR& connections_inf, continue; RouteTree& tree = route_ctx.route_trees[net_id].value(); - connections_inf.prepare_routing_for_net(net_id); connections_inf.clear_force_reroute_for_net(net_id); std::vector usage = tree.get_non_config_node_set_usage(); diff --git a/vpr/src/route/route_timing.h b/vpr/src/route/route_timing.h index bccf9ba2c84..38495bb806b 100644 --- a/vpr/src/route/route_timing.h +++ b/vpr/src/route/route_timing.h @@ -65,20 +65,14 @@ struct RoutingMetrics { /* Data while timing driven route is active */ class timing_driven_route_structs { public: - std::vector pin_criticality; /* [1..max_pins_per_net-1] */ - std::vector sink_order; /* [1..max_pins_per_net-1] */ - std::vector> rt_node_of_sink; /* [1..max_pins_per_net-1] */ + std::vector pin_criticality; /* [1..max_pins_per_net-1] */ timing_driven_route_structs(const Netlist<>& net_list) { int max_sinks = std::max(get_max_pins_per_net(net_list) - 1, 0); pin_criticality.resize(max_sinks + 1); - sink_order.resize(max_sinks + 1); - rt_node_of_sink.resize(max_sinks + 1); /* Set element 0 to invalid values */ pin_criticality[0] = std::numeric_limits::quiet_NaN(); - sink_order[0] = -1; - rt_node_of_sink[0] = vtr::nullopt; } }; @@ -234,7 +228,6 @@ NetResultFlags timing_driven_route_net(ConnectionRouter& router, CBRR& connections_inf, RouterStats& router_stats, std::vector& pin_criticality, - std::vector>& rt_node_of_sink, float* net_delay, const ClusteredPinAtomPinsLookup& netlist_pin_lookup, std::shared_ptr timing_info, @@ -255,7 +248,6 @@ NetResultFlags try_timing_driven_route_net(ConnectionRouter& router, CBRR& connections_inf, RouterStats& router_stats, std::vector& pin_criticality, - std::vector>& rt_node_of_sink, NetPinsMatrix& net_delay, const ClusteredPinAtomPinsLookup& netlist_pin_lookup, std::shared_ptr timing_info, @@ -266,6 +258,26 @@ NetResultFlags try_timing_driven_route_net(ConnectionRouter& router, const std::vector>& choking_spots, bool is_flat); +/** Update net_delay value for a single sink in a RouteTree. */ +inline void update_net_delay_from_isink(float* net_delay, + const RouteTree& tree, + int isink, + const Netlist<>& net_list, + ParentNetId inet, + TimingInfo* timing_info, + NetPinTimingInvalidator* pin_timing_invalidator) { + float new_delay = tree.find_by_isink(isink)->Tdel; + + if (pin_timing_invalidator && new_delay != net_delay[isink]) { + //Delay changed, invalidate for incremental timing update + VTR_ASSERT_SAFE(timing_info); + ParentPinId pin = net_list.net_pin(inet, isink); + pin_timing_invalidator->invalidate_connection(pin, timing_info); + } + + net_delay[isink] = new_delay; +} + void update_router_stats(RouterStats& router_stats, RouterStats& router_iteration_stats); #ifndef NO_GRAPHICS diff --git a/vpr/src/route/route_tree.cpp b/vpr/src/route/route_tree.cpp index 82e929e3ebc..36f37461527 100644 --- a/vpr/src/route/route_tree.cpp +++ b/vpr/src/route/route_tree.cpp @@ -78,10 +78,15 @@ RouteTree::RouteTree(RRNodeId _inode) { RouteTree::RouteTree(ParentNetId _inet) { auto& route_ctx = g_vpr_ctx.routing(); + RRNodeId inode = RRNodeId(route_ctx.net_rr_terminals[_inet][0]); _root = new RouteTreeNode(inode, RRSwitchId::INVALID(), nullptr); _net_id = _inet; _rr_node_to_rt_node[inode] = _root; + + _num_sinks = route_ctx.net_rr_terminals[_inet].size() - 1; + _isink_to_rt_node.resize(_num_sinks); /* 0-indexed */ + _is_isink_reached.resize(_num_sinks + 1); /* 1-indexed */ } /** Make a copy of rhs and return it. @@ -105,43 +110,66 @@ void RouteTree::copy_tree_x(RouteTreeNode* lhs, const RouteTreeNode& rhs) { /* Copy constructor */ RouteTree::RouteTree(const RouteTree& rhs) { - _root = copy_tree(rhs._root); + _isink_to_rt_node.resize(rhs._isink_to_rt_node.size()); _net_id = rhs._net_id; + _root = copy_tree(rhs._root); + _is_isink_reached = rhs._is_isink_reached; + _num_sinks = rhs._num_sinks; } /* Move constructor: * Take over rhs' linked list & set it to null so it doesn't get freed. - * Refs should stay valid after this? */ + * Refs should stay valid after this? + * I don't think there's a user crazy enough to move around route trees + * from multiple threads, but better safe than sorry */ RouteTree::RouteTree(RouteTree&& rhs) { + std::unique_lock rhs_write_lock(rhs._write_mutex); _root = rhs._root; _net_id = rhs._net_id; rhs._root = nullptr; _rr_node_to_rt_node = std::move(rhs._rr_node_to_rt_node); + _isink_to_rt_node = std::move(rhs._isink_to_rt_node); + _is_isink_reached = std::move(rhs._is_isink_reached); + _num_sinks = rhs._num_sinks; } /* Copy assignment: free list, clear lookup, reload list. */ RouteTree& RouteTree::operator=(const RouteTree& rhs) { if (this == &rhs) return *this; + std::unique_lock write_lock(_write_mutex); free_list(_root); _rr_node_to_rt_node.clear(); - _root = copy_tree(rhs._root); + _isink_to_rt_node.clear(); + _isink_to_rt_node.resize(rhs._isink_to_rt_node.size()); _net_id = rhs._net_id; + _root = copy_tree(rhs._root); + _is_isink_reached = rhs._is_isink_reached; + _num_sinks = rhs._num_sinks; return *this; } /* Move assignment: * Free my list, take over rhs' linked list & set it to null so it doesn't get freed. * Also ~steal~ acquire ownership of node lookup from rhs. - * Refs should stay valid after this? */ + * Refs should stay valid after this? + * I don't think there's a user crazy enough to move around route trees + * from multiple threads, but better safe than sorry */ RouteTree& RouteTree::operator=(RouteTree&& rhs) { if (this == &rhs) return *this; + /* See https://stackoverflow.com/a/29988626 */ + std::unique_lock write_lock(_write_mutex, std::defer_lock); + std::unique_lock rhs_write_lock(rhs._write_mutex, std::defer_lock); + std::lock(write_lock, rhs_write_lock); free_list(_root); _root = rhs._root; _net_id = rhs._net_id; rhs._root = nullptr; _rr_node_to_rt_node = std::move(rhs._rr_node_to_rt_node); + _isink_to_rt_node = std::move(rhs._isink_to_rt_node); + _is_isink_reached = std::move(rhs._is_isink_reached); + _num_sinks = rhs._num_sinks; return *this; } @@ -149,6 +177,11 @@ RouteTree& RouteTree::operator=(RouteTree&& rhs) { * Can take a RouteTreeNode& to do an incremental update. * Note that update_from_heap already calls this. */ void RouteTree::reload_timing(vtr::optional from_node) { + std::unique_lock write_lock(_write_mutex); + reload_timing_unlocked(from_node); +} + +void RouteTree::reload_timing_unlocked(vtr::optional from_node) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; @@ -449,8 +482,8 @@ void RouteTree::print(void) const { * RouteTreeNode of the SINK it adds to the routing. */ std::tuple, vtr::optional> RouteTree::update_from_heap(t_heap* hptr, int target_net_pin_index, SpatialRouteTreeLookup* spatial_rt_lookup, bool is_flat) { - auto& device_ctx = g_vpr_ctx.device(); - const auto& rr_graph = device_ctx.rr_graph; + /* Lock the route tree for writing. At least on Linux this shouldn't have an impact on single-threaded code */ + std::unique_lock write_lock(_write_mutex); //Create a new subtree from the target in hptr to existing routing vtr::optional start_of_new_subtree_rt_node, sink_rt_node; @@ -460,19 +493,14 @@ RouteTree::update_from_heap(t_heap* hptr, int target_net_pin_index, SpatialRoute return {vtr::nullopt, *sink_rt_node}; /* Reload timing values */ - reload_timing(start_of_new_subtree_rt_node); + reload_timing_unlocked(start_of_new_subtree_rt_node); if (spatial_rt_lookup) { update_route_tree_spatial_lookup_recur(*start_of_new_subtree_rt_node, *spatial_rt_lookup); } - /* if the new branch is the only child of its parent and the parent is a SOURCE, - * it is the first time we are creating this tree, so include the parent in the new branch return - * so that it can be included in occupancy calculation. - * TODO: probably this should be cleaner */ - RouteTreeNode* parent = start_of_new_subtree_rt_node->_parent; - if (start_of_new_subtree_rt_node->_next_sibling == parent->_subtree_end && rr_graph.node_type(parent->inode) == SOURCE) - return {*parent, *sink_rt_node}; + if (_net_id.is_valid()) /* We don't have this lookup if the tree isn't associated with a net */ + _is_isink_reached[target_net_pin_index] = true; return {*start_of_new_subtree_rt_node, *sink_rt_node}; } @@ -527,7 +555,6 @@ RouteTree::add_subtree_from_heap(t_heap* hptr, int target_net_pin_index, bool is * Walk through new_branch_iswitches and corresponding new_branch_inodes. */ for (int i = new_branch_inodes.size() - 1; i >= 0; i--) { RouteTreeNode* new_node = new RouteTreeNode(new_branch_inodes[i], new_branch_iswitches[i], last_node); - add_node(last_node, new_node); e_rr_type node_type = rr_graph.node_type(new_branch_inodes[i]); // If is_flat is enabled, IPINs should be added, since they are used for intra-cluster routing @@ -540,6 +567,8 @@ RouteTree::add_subtree_from_heap(t_heap* hptr, int target_net_pin_index, bool is new_node->re_expand = true; } + add_node(last_node, new_node); + last_node = new_node; main_branch_visited.insert(new_branch_inodes[i]); @@ -604,6 +633,8 @@ void RouteTree::add_non_configurable_nodes(RouteTreeNode* rt_node, /** Prune a route tree of illegal branches - when there is at least 1 congested node on the path to a sink * Returns nullopt if the entire tree has been pruned. + * Updates "is_isink_reached" lookup! After prune(), if a sink is marked as reached in the lookup, it is reached + * legally. * * Note: does not update R_upstream/C_downstream */ vtr::optional @@ -612,6 +643,8 @@ RouteTree::prune(CBRR& connections_inf, std::vector* non_config_node_set_us const auto& rr_graph = device_ctx.rr_graph; auto& route_ctx = g_vpr_ctx.routing(); + std::unique_lock write_lock(_write_mutex); + VTR_ASSERT_MSG(rr_graph.node_type(root().inode) == SOURCE, "Root of route tree must be SOURCE"); VTR_ASSERT_MSG(_net_id, "RouteTree must be constructed using a ParentNetId"); @@ -675,14 +708,12 @@ RouteTree::prune_x(RouteTreeNode& rt_node, CBRR& connections_inf, bool force_pru if (!force_prune) { //Valid path to sink - //Record sink as reachable - connections_inf.reached_rt_sink(_net_id, rt_node.inode); - + //Record sink as reached + _is_isink_reached[rt_node.net_pin_index] = true; return rt_node; // Not pruned } else { //Record as not reached - connections_inf.toreach_rr_sink(_net_id, rt_node.net_pin_index); - + _is_isink_reached[rt_node.net_pin_index] = false; return vtr::nullopt; // Pruned } } else if (all_children_pruned) { @@ -789,6 +820,7 @@ RouteTree::prune_x(RouteTreeNode& rt_node, CBRR& connections_inf, bool force_pru * This is used after routing a clock net. * TODO: is this function doing anything? Try running without it */ void RouteTree::freeze(void) { + std::unique_lock write_lock(_write_mutex); return freeze_x(*_root); } diff --git a/vpr/src/route/route_tree.h b/vpr/src/route/route_tree.h index 9d2200d2696..63eebf555ea 100644 --- a/vpr/src/route/route_tree.h +++ b/vpr/src/route/route_tree.h @@ -83,6 +83,7 @@ #include #include #include +#include #include #include "connection_based_routing_fwd.h" @@ -90,6 +91,8 @@ #include "vtr_assert.h" #include "spatial_route_tree_lookup.h" #include "vtr_optional.h" +#include "vtr_range.h" +#include "vtr_vec_id_set.h" /** * @brief A single route tree node @@ -341,6 +344,7 @@ class RouteTree { RouteTree(ParentNetId inet); ~RouteTree() { + std::unique_lock write_lock(_write_mutex); free_list(_root); } @@ -349,19 +353,34 @@ class RouteTree { * is the heap pointer of the SINK that was reached, and target_net_pin_index * is the net pin index corresponding to the SINK that was reached. This routine * returns a tuple: RouteTreeNode of the branch it adds to the route tree and - * RouteTreeNode of the SINK it adds to the routing. */ + * RouteTreeNode of the SINK it adds to the routing. + * Locking operation: only one thread can update_from_heap() a RouteTree at a time. */ std::tuple, vtr::optional> update_from_heap(t_heap* hptr, int target_net_pin_index, SpatialRouteTreeLookup* spatial_rt_lookup, bool is_flat); /** Reload timing values (R_upstream, C_downstream, Tdel). * Can take a RouteTreeNode& to do an incremental update. - * Note that update_from_heap already does this, but prune() doesn't */ + * Note that update_from_heap already does this, but prune() doesn't. + * Locking operation: only one thread can reload_timing() for a RouteTree at a time. */ void reload_timing(vtr::optional from_node = vtr::nullopt); /** Get the RouteTreeNode corresponding to the RRNodeId. Returns nullopt if not found. - * SINK nodes may be added to the tree multiple times. In that case, this will return the last one added. */ + * SINK nodes may be added to the tree multiple times. In that case, this will return the last one added. + * Use find_by_isink for a more accurate lookup. */ vtr::optional find_by_rr_id(RRNodeId rr_node) const; + /** Get the sink RouteTreeNode associated with the isink. + * Will probably segfault if the tree is not constructed with a ParentNetId. */ + inline vtr::optional find_by_isink(int isink) const { + RouteTreeNode* x = _isink_to_rt_node[isink - 1]; + return x ? vtr::optional(*x) : vtr::nullopt; + } + + /** Get the number of sinks in associated net. */ + constexpr size_t num_sinks(void) const { + return _num_sinks; + } + /** Check the consistency of this route tree. Looks for: * - invalid parent-child links * - invalid timing values @@ -378,12 +397,14 @@ class RouteTree { /** Prune overused nodes from the tree. * Also prune unused non-configurable nodes if non_config_node_set_usage is provided (see get_non_config_node_set_usage) - * Returns nullopt if the entire tree is pruned. */ + * Returns nullopt if the entire tree is pruned. + * Locking operation: only one thread can prune() a RouteTree at a time. */ vtr::optional prune(CBRR& connections_inf, std::vector* non_config_node_set_usage = nullptr); /** Remove all sinks and mark the remaining nodes as un-expandable. * This is used after routing a clock net. - * TODO: is this function doing anything? Try running without it */ + * TODO: is this function doing anything? Try running without it + * Locking operation: only one thread can freeze() a RouteTree at a time. */ void freeze(void); /** Count configurable edges to non-configurable node sets. (rr_nonconf_node_sets index -> int) @@ -400,6 +421,71 @@ class RouteTree { /** Get a reference to the root RouteTreeNode. */ constexpr const RouteTreeNode& root(void) const { return *_root; } /* this file is 90% const and 10% code */ + /** Iterator implementation for remaining or reached isinks. Goes over [1..num_sinks] + * and only returns a value when the sink state is right */ + template + class IsinkIterator { + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = int; + using pointer = int*; + using reference = int&; + + constexpr IsinkIterator(const std::vector& bitset, size_t x) + : _bitset(bitset) + , _x(x) { + if (_x < _bitset.size() && _bitset[_x] != sink_state) /* Iterate forward to a valid state */ + ++(*this); + } + constexpr value_type operator*() const { + return _x; + } + inline IsinkIterator& operator++() { + _x++; + for (; _x < _bitset.size() && _bitset[_x] != sink_state; _x++) + ; + return *this; + } + inline IsinkIterator operator++(int) { + IsinkIterator tmp = *this; + ++(*this); + return tmp; + } + constexpr bool operator==(const IsinkIterator& rhs) { return _x == rhs._x; } + constexpr bool operator!=(const IsinkIterator& rhs) { return _x != rhs._x; } + + private: + /** Ref to the bitset */ + const std::vector& _bitset; + /** Current position */ + size_t _x; + }; + + typedef vtr::Range> reached_isink_range; + typedef vtr::Range> remaining_isink_range; + + /** Get a lookup which contains the "isink reached state". + * It's a 1-indexed! bitset of "pin indices". True if the nth sink has been reached, false otherwise. + * If you call it before prune() and after routing, there's no guarantee on whether the reached sinks + * are reached legally. */ + constexpr const std::vector& get_is_isink_reached(void) const { return _is_isink_reached; } + + /** Get reached isinks: 1-indexed pin indices enumerating the sinks in this net. + * "Reached" means "reached legally" if you call this after prune() and not before any routing. + * Otherwise it doesn't guarantee legality. + * Builds and returns a value: use get_is_isink_reached directly if you want speed. */ + constexpr reached_isink_range get_reached_isinks(void) const { + return vtr::make_range(IsinkIterator(_is_isink_reached, 1), IsinkIterator(_is_isink_reached, _num_sinks + 1)); + } + + /** Get remaining (not routed (legally?)) isinks: + * 1-indexed pin indices enumerating the sinks in this net. + * Caveats in get_reached_isinks() apply. */ + constexpr remaining_isink_range get_remaining_isinks(void) const { + return vtr::make_range(IsinkIterator(_is_isink_reached, 1), IsinkIterator(_is_isink_reached, _num_sinks + 1)); + } + private: std::tuple, vtr::optional> add_subtree_from_heap(t_heap* hptr, int target_net_pin_index, bool is_flat); @@ -409,6 +495,7 @@ class RouteTree { std::unordered_set& visited, bool is_flat); + void reload_timing_unlocked(vtr::optional from_node = vtr::nullopt); void load_new_subtree_R_upstream(RouteTreeNode& from_node); float load_new_subtree_C_downstream(RouteTreeNode& from_node); RouteTreeNode& update_unbuffered_ancestors_C_downstream(RouteTreeNode& from_node); @@ -442,7 +529,12 @@ class RouteTree { node->_next_sibling = parent->_next; } parent->_next = node; + + /** Add node to RR to RT lookup */ _rr_node_to_rt_node[node->inode] = node; + /** If node is a SINK (net_pin_index > 0), also add it to sink RT lookup */ + if (node->net_pin_index > 0 && _net_id.is_valid()) + _isink_to_rt_node[node->net_pin_index - 1] = node; /* Now it's a branch */ parent->_is_leaf = false; @@ -526,4 +618,23 @@ class RouteTree { * therefore store the last rt_node created of all the SINK nodes with the same * index "inode". */ std::unordered_map _rr_node_to_rt_node; + + /** RRNodeId is not a unique lookup for sink RouteTreeNodes, but net_pin_index + * is. Store a 0-indexed lookup here for users who need to look up a sink from + * a net_pin_index, ipin, isink, etc. */ + std::vector _isink_to_rt_node; + + /** Is Nth sink in this net reached? + * Bitset of [1..num_sinks]. (1-indexed!) + * We work with these indices, because they are used in a bunch of lookups in + * the router. Looking these back up from sink RR nodes would require looking + * up its RouteTreeNode and then the net_pin_index from that. */ + std::vector _is_isink_reached; + + /** Number of sinks in this tree's net. Useful for iteration. */ + size_t _num_sinks; + + /** Write mutex on this RouteTree. Acquired by the write operations automatically: + * the caller does not need to know about a lock. */ + std::mutex _write_mutex; }; diff --git a/vpr/src/route/route_util.cpp b/vpr/src/route/route_util.cpp index 96e9551fbe8..8ca7f657124 100644 --- a/vpr/src/route/route_util.cpp +++ b/vpr/src/route/route_util.cpp @@ -1,7 +1,9 @@ #include "route_util.h" #include "globals.h" +#include "draw_types.h" +#include "draw_global.h" -vtr::Matrix calculate_routing_usage(t_rr_type rr_type, bool is_flat) { +vtr::Matrix calculate_routing_usage(t_rr_type rr_type, bool is_flat, bool is_print) { VTR_ASSERT(rr_type == CHANX || rr_type == CHANY); auto& device_ctx = g_vpr_ctx.device(); @@ -27,6 +29,15 @@ vtr::Matrix calculate_routing_usage(t_rr_type rr_type, bool is_flat) { //Record number of used resources in each x/y channel for (RRNodeId rr_node : rr_nodes) { +#ifndef NO_GRAPHICS + if (!is_print) { + t_draw_state* draw_state = get_draw_state_vars(); + int layer_num = rr_graph.node_layer(rr_node); + if (!draw_state->draw_layer_display[layer_num].visible) + continue; // don't count usage if layer is not visible + } +#endif + if (rr_type == CHANX) { VTR_ASSERT(rr_graph.node_type(rr_node) == CHANX); VTR_ASSERT(rr_graph.node_ylow(rr_node) == rr_graph.node_yhigh(rr_node)); diff --git a/vpr/src/route/route_util.h b/vpr/src/route/route_util.h index 21147db9544..ba8d6995738 100644 --- a/vpr/src/route/route_util.h +++ b/vpr/src/route/route_util.h @@ -1,9 +1,21 @@ #ifndef VPR_ROUTE_UTIL_H #define VPR_ROUTE_UTIL_H #include "vpr_types.h" +#include "draw_types.h" +#include "draw_global.h" vtr::Matrix calculate_routing_avail(t_rr_type rr_type); -vtr::Matrix calculate_routing_usage(t_rr_type rr_type, bool is_flat); + +/** + * @brief: Calculates and returns the usage over the entire grid for the specified + * type of rr_node to the usage array. The usage is recorded at each (x,y) location. + * + * @param rr_type: Type of rr_node that we are calculating the usage of; can be CHANX or CHANY + * @param is_flat: Is the flat router being used or not? + * @param only_visible: If true, only record the usage of rr_nodes on layers that are visible according to the current + * drawing settings. + */ +vtr::Matrix calculate_routing_usage(t_rr_type rr_type, bool is_flat, bool is_print); float routing_util(float used, float avail); #endif diff --git a/vtr_flow/arch/noc/mesh_noc_topology/coffe_7nm_NoC_mesh_topology.xml b/vtr_flow/arch/noc/mesh_noc_topology/coffe_7nm_NoC_mesh_topology.xml new file mode 100644 index 00000000000..89b23828938 --- /dev/null +++ b/vtr_flow/arch/noc/mesh_noc_topology/coffe_7nm_NoC_mesh_topology.xml @@ -0,0 +1,1771 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.outpad io.clock io.inpad + io.outpad io.clock io.inpad + io.outpad io.clock io.inpad + io.outpad io.clock io.inpad + + + + + + + + + + + + + + + + + + + + + + + + + + + clb.I1 clb.I3 clb.clk clb.O1 clb.O2 clb.O3 clb.O4 clb.O5 + clb.I2 clb.I4 clb.clk clb.O6 clb.O7 clb.O8 clb.O9 clb.O10 + + + + + + + + + + + + + + + + + + + + + + + + + + + dsp_top.dsp_I1 dsp_top.reset dsp_top.result[73:37] + dsp_top.dsp_I2 dsp_top.clk dsp_top.result[36:0] + dsp_top.chainin dsp_top.scanin + dsp_top.chainout dsp_top.scanout + + + + + + + + + + + + + + + + + + + + memory.addr1[0] memory.addr1[8] memory.addr2[5] memory.data[2] memory.data[10] memory.data[18] memory.data[26] memory.data[34] memory.clk memory.addr1[1] memory.addr1[9] memory.addr2[6] memory.data[3] memory.data[11] memory.data[19] memory.data[27] memory.data[35] memory.out[0] memory.out[8] memory.out[16] memory.out[24] memory.out[32] memory.out[1] memory.out[9] memory.out[17] memory.out[25] memory.out[33] + memory.addr1[2] memory.addr1[10] memory.addr2[7] memory.data[4] memory.data[12] memory.data[20] memory.data[28] memory.data[36] memory.addr1[3] memory.addr2[0] memory.addr2[8] memory.data[5] memory.data[13] memory.data[21] memory.data[29] memory.data[37] memory.out[2] memory.out[10] memory.out[18] memory.out[26] memory.out[34] memory.out[3] memory.out[11] memory.out[19] memory.out[27] memory.out[35] + memory.addr1[4] memory.addr2[1] memory.addr2[9] memory.data[6] memory.data[14] memory.data[22] memory.data[30] memory.data[38] memory.addr1[5] memory.addr2[2] memory.addr2[10] memory.data[7] memory.data[15] memory.data[23] memory.data[31] memory.data[39] memory.out[4] memory.out[12] memory.out[20] memory.out[28] memory.out[36] memory.out[5] memory.out[13] memory.out[21] memory.out[29] memory.out[37] + memory.addr1[6] memory.addr2[3] memory.data[0] memory.data[8] memory.data[16] memory.data[24] memory.data[32] memory.we1 memory.addr1[7] memory.addr2[4] memory.data[1] memory.data[9] memory.data[17] memory.data[25] memory.data[33] memory.we2 memory.out[6] memory.out[14] memory.out[22] memory.out[30] memory.out[38] memory.out[7] memory.out[15] memory.out[23] memory.out[31] memory.out[39] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 1 1 1 1 + 1 1 1 1 + + + + + 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 + + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 0 + 0 + 0 + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 0 + 0 + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/vtr_flow/benchmarks/noc/Large_Designs/MLP/MLP_1/verilog/mlp1_complete_engine.v b/vtr_flow/benchmarks/noc/Large_Designs/MLP/MLP_1/verilog/mlp1_complete_engine.v new file mode 100644 index 00000000000..f46bd8f14e7 --- /dev/null +++ b/vtr_flow/benchmarks/noc/Large_Designs/MLP/MLP_1/verilog/mlp1_complete_engine.v @@ -0,0 +1,2235 @@ +/* + Top level module to connect a number of MVM modules together using a + NoC. The design implements the following: + 1) A number of dispatcher modules used to generate initial traffic + 2) A number of MVM modules that perform matrix vector multiplication + on input data streams + 3) A collector module to process the results of the MVM modules + 4) A numbe of NoC routers that are used to connect all the modules + added above. The NoC routers are responsible for transferring data + from dispatchers to MVM modules, between MVM modules and from MVM + modules to the collector module. +*/ + +/* This file contain whole mlp engine (contain all submodule definitions) that can go through Odin-II for synthesis without any changes */ + +module mlp_1 ( + clk, + reset, + rx_top_tvalid, + rx_top_tdata, + rx_top_tstrb, + rx_top_tkeep, + rx_top_tid, + rx_top_tdest, + rx_top_tuser, + rx_top_tlast, + dispatcher0_ififo_wdata, // external I/O input for dispatcher + dispatcher0_ififo_wen, // external I/O input for dispatcher + dispatcher1_ififo_wdata, // external I/O input for dispatcher + dispatcher1_ififo_wen, // external I/O input for dispatcher + dispatcher2_ififo_wdata, // external I/O input for dispatcher + dispatcher2_ififo_wen, // external I/O input for dispatcher + dispatcher3_ififo_wdata, // external I/O input for dispatcher + dispatcher3_ififo_wen, // external I/O input for dispatcher + collector_ofifo_rdata, // external I/O output for collector + collector_ofifo_ren, // external I/O input for collector + dispatcher0_ififo_rdy, // external I/O output for dispatcher + dispatcher1_ififo_rdy, // external I/O output for dispatcher + dispatcher2_ififo_rdy, // external I/O output for dispatcher + dispatcher3_ififo_rdy, // external I/O output for dispatcher + collector_ofifo_rdy, // external I/O output for collector + tx_top_tvalid, + tx_top_tdata, + master_tdata_top_out, + tx_top_tstrb, + tx_top_tkeep, + tx_top_tid, + tx_top_tdest, + tx_top_tuser, + tx_top_tlast +); + +parameter noc_dw = 512; //NoC Data Width +parameter byte_dw = 8; + +/*****************INPUT/OUTPUT Definition********************/ +input wire clk; +input wire reset; + +input wire [noc_dw/byte_dw - 1:0] dispatcher0_ififo_wdata; +input wire dispatcher0_ififo_wen; +input wire [noc_dw/byte_dw - 1:0] dispatcher1_ififo_wdata; +input wire dispatcher1_ififo_wen; +input wire [noc_dw/byte_dw - 1:0] dispatcher2_ififo_wdata; +input wire dispatcher2_ififo_wen; +input wire [noc_dw/byte_dw - 1:0] dispatcher3_ififo_wdata; +input wire dispatcher3_ififo_wen; + +input wire collector_ofifo_ren; + +input wire rx_top_tvalid; +input wire [511:0] rx_top_tdata; +input wire [ 63:0] rx_top_tstrb; +input wire [ 63:0] rx_top_tkeep; +input wire [ 7:0] rx_top_tid; +input wire [ 7:0] rx_top_tdest; +input wire [ 31:0] rx_top_tuser; +input wire rx_top_tlast; + +output wire tx_top_tvalid; +output wire [511:0] tx_top_tdata; +output wire [ 63:0] tx_top_tstrb; +output wire [ 63:0] tx_top_tkeep; +output wire [ 7:0] tx_top_tid; +output wire [ 7:0] tx_top_tdest; +output wire [ 31:0] tx_top_tuser; +output wire tx_top_tlast; +output wire [noc_dw - 1: 0] master_tdata_top_out; + + +output wire [noc_dw/byte_dw - 1:0] collector_ofifo_rdata; +output wire collector_ofifo_rdy; + +output wire dispatcher0_ififo_rdy; +output wire dispatcher1_ififo_rdy; +output wire dispatcher2_ififo_rdy; +output wire dispatcher3_ififo_rdy; + +/*******************Internal Variables**********************/ +// dispatcher signals +// dispatcher0 +wire dispatcher0_tx_valid; +wire [noc_dw - 1 : 0] dispatcher0_tx_tdata; +wire [byte_dw - 1 : 0] dispatcher0_tx_tstrb; +wire [byte_dw - 1 : 0] dispatcher0_tx_tkeep; +wire [7 : 0] dispatcher0_tx_tid; +wire [7 : 0] dispatcher0_tx_tdest; +wire [31 : 0] dispatcher0_tx_tuser; +wire dispatcher0_tx_tlast; + +wire dispatcher0_connector_tx_tready; + +// dispatcher1 +wire dispatcher1_tx_valid; +wire [noc_dw - 1 : 0] dispatcher1_tx_tdata; +wire [byte_dw - 1 : 0] dispatcher1_tx_tstrb; +wire [byte_dw - 1 : 0] dispatcher1_tx_tkeep; +wire [7 : 0] dispatcher1_tx_tid; +wire [7 : 0] dispatcher1_tx_tdest; +wire [31 : 0] dispatcher1_tx_tuser; +wire dispatcher1_tx_tlast; + +wire dispatcher1_connector_tx_tready; + +// dispatcher2 +wire dispatcher2_tx_valid; +wire [noc_dw - 1 : 0] dispatcher2_tx_tdata; +wire [byte_dw - 1 : 0] dispatcher2_tx_tstrb; +wire [byte_dw - 1 : 0] dispatcher2_tx_tkeep; +wire [7 : 0] dispatcher2_tx_tid; +wire [7 : 0] dispatcher2_tx_tdest; +wire [31 : 0] dispatcher2_tx_tuser; +wire dispatcher2_tx_tlast; + +wire dispatcher2_connector_tx_tready; + +// dispatcher3 +wire dispatcher3_tx_valid; +wire [noc_dw - 1 : 0] dispatcher3_tx_tdata; +wire [byte_dw - 1 : 0] dispatcher3_tx_tstrb; +wire [byte_dw - 1 : 0] dispatcher3_tx_tkeep; +wire [7 : 0] dispatcher3_tx_tid; +wire [7 : 0] dispatcher3_tx_tdest; +wire [31 : 0] dispatcher3_tx_tuser; +wire dispatcher3_tx_tlast; + +wire dispatcher3_connector_tx_tready; + +// collector signals +wire collector_rx_valid; +wire [noc_dw - 1 : 0] collector_rx_tdata; +wire [byte_dw - 1 : 0] collector_rx_tstrb; +wire [byte_dw - 1 : 0] collector_rx_tkeep; +wire [7 : 0] collector_rx_tid; +wire [7 : 0] collector_rx_tdest; +wire [31 : 0] collector_rx_tuser; +wire collector_rx_tlast; + +wire collector_connector_rx_tready; + +// mvm module signals +// layer0_mvm0 +wire layer0_mvm0_tx_valid; +wire [noc_dw - 1 : 0] layer0_mvm0_tx_tdata; +wire [byte_dw - 1 : 0] layer0_mvm0_tx_tstrb; +wire [byte_dw - 1 : 0] layer0_mvm0_tx_tkeep; +wire [7 : 0] layer0_mvm0_tx_tid; +wire [7 : 0] layer0_mvm0_tx_tdest; +wire [31 : 0] layer0_mvm0_tx_tuser; +wire layer0_mvm0_tx_tlast; + +wire layer0_mvm0_connector_tx_tready; + +wire layer0_mvm0_rx_valid; +wire [noc_dw - 1 : 0] layer0_mvm0_rx_tdata; +wire [byte_dw - 1 : 0] layer0_mvm0_rx_tstrb; +wire [byte_dw - 1 : 0] layer0_mvm0_rx_tkeep; +wire [7 : 0] layer0_mvm0_rx_tid; +wire [7 : 0] layer0_mvm0_rx_tdest; +wire [31 : 0] layer0_mvm0_rx_tuser; +wire layer0_mvm0_rx_tlast; + +wire layer0_mvm0_connector_rx_tready; +// layer0_mvm1 +wire layer0_mvm1_tx_valid; +wire [noc_dw - 1 : 0] layer0_mvm1_tx_tdata; +wire [byte_dw - 1 : 0] layer0_mvm1_tx_tstrb; +wire [byte_dw - 1 : 0] layer0_mvm1_tx_tkeep; +wire [7 : 0] layer0_mvm1_tx_tid; +wire [7 : 0] layer0_mvm1_tx_tdest; +wire [31 : 0] layer0_mvm1_tx_tuser; +wire layer0_mvm1_tx_tlast; + +wire layer0_mvm1_connector_tx_tready; + +wire layer0_mvm1_rx_valid; +wire [noc_dw - 1 : 0] layer0_mvm1_rx_tdata; +wire [byte_dw - 1 : 0] layer0_mvm1_rx_tstrb; +wire [byte_dw - 1 : 0] layer0_mvm1_rx_tkeep; +wire [7 : 0] layer0_mvm1_rx_tid; +wire [7 : 0] layer0_mvm1_rx_tdest; +wire [31 : 0] layer0_mvm1_rx_tuser; +wire layer0_mvm1_rx_tlast; + +wire layer0_mvm1_connector_rx_tready; +// layer0_mvm2 +wire layer0_mvm2_tx_valid; +wire [noc_dw - 1 : 0] layer0_mvm2_tx_tdata; +wire [byte_dw - 1 : 0] layer0_mvm2_tx_tstrb; +wire [byte_dw - 1 : 0] layer0_mvm2_tx_tkeep; +wire [7 : 0] layer0_mvm2_tx_tid; +wire [7 : 0] layer0_mvm2_tx_tdest; +wire [31 : 0] layer0_mvm2_tx_tuser; +wire layer0_mvm2_tx_tlast; + +wire layer0_mvm2_connector_tx_tready; + +wire layer0_mvm2_rx_valid; +wire [noc_dw - 1 : 0] layer0_mvm2_rx_tdata; +wire [byte_dw - 1 : 0] layer0_mvm2_rx_tstrb; +wire [byte_dw - 1 : 0] layer0_mvm2_rx_tkeep; +wire [7 : 0] layer0_mvm2_rx_tid; +wire [7 : 0] layer0_mvm2_rx_tdest; +wire [31 : 0] layer0_mvm2_rx_tuser; +wire layer0_mvm2_rx_tlast; + +wire layer0_mvm2_connector_rx_tready; +// layer0_mvm3 +wire layer0_mvm3_tx_valid; +wire [noc_dw - 1 : 0] layer0_mvm3_tx_tdata; +wire [byte_dw - 1 : 0] layer0_mvm3_tx_tstrb; +wire [byte_dw - 1 : 0] layer0_mvm3_tx_tkeep; +wire [7 : 0] layer0_mvm3_tx_tid; +wire [7 : 0] layer0_mvm3_tx_tdest; +wire [31 : 0] layer0_mvm3_tx_tuser; +wire layer0_mvm3_tx_tlast; + +wire layer0_mvm3_connector_tx_tready; + +wire layer0_mvm3_rx_valid; +wire [noc_dw - 1 : 0] layer0_mvm3_rx_tdata; +wire [byte_dw - 1 : 0] layer0_mvm3_rx_tstrb; +wire [byte_dw - 1 : 0] layer0_mvm3_rx_tkeep; +wire [7 : 0] layer0_mvm3_rx_tid; +wire [7 : 0] layer0_mvm3_rx_tdest; +wire [31 : 0] layer0_mvm3_rx_tuser; +wire layer0_mvm3_rx_tlast; + +wire layer0_mvm3_connector_rx_tready; +// layer1_mvm0 +wire layer1_mvm0_tx_valid; +wire [noc_dw - 1 : 0] layer1_mvm0_tx_tdata; +wire [byte_dw - 1 : 0] layer1_mvm0_tx_tstrb; +wire [byte_dw - 1 : 0] layer1_mvm0_tx_tkeep; +wire [7 : 0] layer1_mvm0_tx_tid; +wire [7 : 0] layer1_mvm0_tx_tdest; +wire [31 : 0] layer1_mvm0_tx_tuser; +wire layer1_mvm0_tx_tlast; + +wire layer1_mvm0_connector_tx_tready; + +wire layer1_mvm0_rx_valid; +wire [noc_dw - 1 : 0] layer1_mvm0_rx_tdata; +wire [byte_dw - 1 : 0] layer1_mvm0_rx_tstrb; +wire [byte_dw - 1 : 0] layer1_mvm0_rx_tkeep; +wire [7 : 0] layer1_mvm0_rx_tid; +wire [7 : 0] layer1_mvm0_rx_tdest; +wire [31 : 0] layer1_mvm0_rx_tuser; +wire layer1_mvm0_rx_tlast; + +wire layer1_mvm0_connector_rx_tready; +// layer1_mvm1 +wire layer1_mvm1_tx_valid; +wire [noc_dw - 1 : 0] layer1_mvm1_tx_tdata; +wire [byte_dw - 1 : 0] layer1_mvm1_tx_tstrb; +wire [byte_dw - 1 : 0] layer1_mvm1_tx_tkeep; +wire [7 : 0] layer1_mvm1_tx_tid; +wire [7 : 0] layer1_mvm1_tx_tdest; +wire [31 : 0] layer1_mvm1_tx_tuser; +wire layer1_mvm1_tx_tlast; + +wire layer1_mvm1_connector_tx_tready; + +wire layer1_mvm1_rx_valid; +wire [noc_dw - 1 : 0] layer1_mvm1_rx_tdata; +wire [byte_dw - 1 : 0] layer1_mvm1_rx_tstrb; +wire [byte_dw - 1 : 0] layer1_mvm1_rx_tkeep; +wire [7 : 0] layer1_mvm1_rx_tid; +wire [7 : 0] layer1_mvm1_rx_tdest; +wire [31 : 0] layer1_mvm1_rx_tuser; +wire layer1_mvm1_rx_tlast; + +wire layer1_mvm1_connector_rx_tready; +// layer1_mvm2 +wire layer1_mvm2_tx_valid; +wire [noc_dw - 1 : 0] layer1_mvm2_tx_tdata; +wire [byte_dw - 1 : 0] layer1_mvm2_tx_tstrb; +wire [byte_dw - 1 : 0] layer1_mvm2_tx_tkeep; +wire [7 : 0] layer1_mvm2_tx_tid; +wire [7 : 0] layer1_mvm2_tx_tdest; +wire [31 : 0] layer1_mvm2_tx_tuser; +wire layer1_mvm2_tx_tlast; + +wire layer1_mvm2_connector_tx_tready; + +wire layer1_mvm2_rx_valid; +wire [noc_dw - 1 : 0] layer1_mvm2_rx_tdata; +wire [byte_dw - 1 : 0] layer1_mvm2_rx_tstrb; +wire [byte_dw - 1 : 0] layer1_mvm2_rx_tkeep; +wire [7 : 0] layer1_mvm2_rx_tid; +wire [7 : 0] layer1_mvm2_rx_tdest; +wire [31 : 0] layer1_mvm2_rx_tuser; +wire layer1_mvm2_rx_tlast; + +wire layer1_mvm2_connector_rx_tready; + + +// layer2_mvm0 +wire layer2_mvm0_tx_valid; +wire [noc_dw - 1 : 0] layer2_mvm0_tx_tdata; +wire [byte_dw - 1 : 0] layer2_mvm0_tx_tstrb; +wire [byte_dw - 1 : 0] layer2_mvm0_tx_tkeep; +wire [7 : 0] layer2_mvm0_tx_tid; +wire [7 : 0] layer2_mvm0_tx_tdest; +wire [31 : 0] layer2_mvm0_tx_tuser; +wire layer2_mvm0_tx_tlast; + +wire layer2_mvm0_connector_tx_tready; + +wire layer2_mvm0_rx_valid; +wire [noc_dw - 1 : 0] layer2_mvm0_rx_tdata; +wire [byte_dw - 1 : 0] layer2_mvm0_rx_tstrb; +wire [byte_dw - 1 : 0] layer2_mvm0_rx_tkeep; +wire [7 : 0] layer2_mvm0_rx_tid; +wire [7 : 0] layer2_mvm0_rx_tdest; +wire [31 : 0] layer2_mvm0_rx_tuser; +wire layer2_mvm0_rx_tlast; + +wire layer2_mvm0_connector_rx_tready; +// layer2_mvm1 +wire layer2_mvm1_tx_valid; +wire [noc_dw - 1 : 0] layer2_mvm1_tx_tdata; +wire [byte_dw - 1 : 0] layer2_mvm1_tx_tstrb; +wire [byte_dw - 1 : 0] layer2_mvm1_tx_tkeep; +wire [7 : 0] layer2_mvm1_tx_tid; +wire [7 : 0] layer2_mvm1_tx_tdest; +wire [31 : 0] layer2_mvm1_tx_tuser; +wire layer2_mvm1_tx_tlast; + +wire layer2_mvm1_connector_tx_tready; + +wire layer2_mvm1_rx_valid; +wire [noc_dw - 1 : 0] layer2_mvm1_rx_tdata; +wire [byte_dw - 1 : 0] layer2_mvm1_rx_tstrb; +wire [byte_dw - 1 : 0] layer2_mvm1_rx_tkeep; +wire [7 : 0] layer2_mvm1_rx_tid; +wire [7 : 0] layer2_mvm1_rx_tdest; +wire [31 : 0] layer2_mvm1_rx_tuser; +wire layer2_mvm1_rx_tlast; + +wire layer2_mvm1_connector_rx_tready; +// layer3_mvm0 +wire layer3_mvm0_tx_valid; +wire [noc_dw - 1 : 0] layer3_mvm0_tx_tdata; +wire [byte_dw - 1 : 0] layer3_mvm0_tx_tstrb; +wire [byte_dw - 1 : 0] layer3_mvm0_tx_tkeep; +wire [7 : 0] layer3_mvm0_tx_tid; +wire [7 : 0] layer3_mvm0_tx_tdest; +wire [31 : 0] layer3_mvm0_tx_tuser; +wire layer3_mvm0_tx_tlast; + +wire layer3_mvm0_connector_tx_tready; + +wire layer3_mvm0_rx_valid; +wire [noc_dw - 1 : 0] layer3_mvm0_rx_tdata; +wire [byte_dw - 1 : 0] layer3_mvm0_rx_tstrb; +wire [byte_dw - 1 : 0] layer3_mvm0_rx_tkeep; +wire [7 : 0] layer3_mvm0_rx_tid; +wire [7 : 0] layer3_mvm0_rx_tdest; +wire [31 : 0] layer3_mvm0_rx_tuser; +wire layer3_mvm0_rx_tlast; + +wire layer3_mvm0_connector_rx_tready; +// layer3_mvm1 +wire layer3_mvm1_tx_valid; +wire [noc_dw - 1 : 0] layer3_mvm1_tx_tdata; +wire [byte_dw - 1 : 0] layer3_mvm1_tx_tstrb; +wire [byte_dw - 1 : 0] layer3_mvm1_tx_tkeep; +wire [7 : 0] layer3_mvm1_tx_tid; +wire [7 : 0] layer3_mvm1_tx_tdest; +wire [31 : 0] layer3_mvm1_tx_tuser; +wire layer3_mvm1_tx_tlast; + +wire layer3_mvm1_connector_tx_tready; + +wire layer3_mvm1_rx_valid; +wire [noc_dw - 1 : 0] layer3_mvm1_rx_tdata; +wire [byte_dw - 1 : 0] layer3_mvm1_rx_tstrb; +wire [byte_dw - 1 : 0] layer3_mvm1_rx_tkeep; +wire [7 : 0] layer3_mvm1_rx_tid; +wire [7 : 0] layer3_mvm1_rx_tdest; +wire [31 : 0] layer3_mvm1_rx_tuser; +wire layer3_mvm1_rx_tlast; + +wire layer3_mvm1_connector_rx_tready; + + +/*******************module instantiation********************/ + +// dispatcher modules +dispatcher dispatcher0( + .clk(clk), + .rst(reset), + .tx_tvalid(dispatcher0_tx_valid), + .tx_tdata(dispatcher0_tx_tdata), + .tx_tstrb(dispatcher0_tx_tstrb), + .tx_tkeep(dispatcher0_tx_tkeep), + .tx_tid(dispatcher0_tx_tid), + .tx_tdest(dispatcher0_tx_tdest), + .tx_tuser(dispatcher0_tx_tuser), + .tx_tlast(dispatcher0_tx_tlast), + .tx_tready(dispatcher0_connector_tx_tready), + .ififo_wdata(dispatcher0_ififo_wdata), + .ififo_wen(dispatcher0_ififo_wen), + .ififo_rdy(dispatcher0_ififo_rdy) +); +dispatcher dispatcher1( + .clk(clk), + .rst(reset), + .tx_tvalid(dispatcher1_tx_valid), + .tx_tdata(dispatcher1_tx_tdata), + .tx_tstrb(dispatcher1_tx_tstrb), + .tx_tkeep(dispatcher1_tx_tkeep), + .tx_tid(dispatcher1_tx_tid), + .tx_tdest(dispatcher1_tx_tdest), + .tx_tuser(dispatcher1_tx_tuser), + .tx_tlast(dispatcher1_tx_tlast), + .tx_tready(dispatcher1_connector_tx_tready), + .ififo_wdata(dispatcher1_ififo_wdata), + .ififo_wen(dispatcher1_ififo_wen), + .ififo_rdy(dispatcher1_ififo_rdy) +); +dispatcher dispatcher2( + .clk(clk), + .rst(reset), + .tx_tvalid(dispatcher2_tx_valid), + .tx_tdata(dispatcher2_tx_tdata), + .tx_tstrb(dispatcher2_tx_tstrb), + .tx_tkeep(dispatcher2_tx_tkeep), + .tx_tid(dispatcher2_tx_tid), + .tx_tdest(dispatcher2_tx_tdest), + .tx_tuser(dispatcher2_tx_tuser), + .tx_tlast(dispatche2_tx_tlast), + .tx_tready(dispatcher2_connector_tx_tready), + .ififo_wdata(dispatcher2_ififo_wdata), + .ififo_wen(dispatcher2_ififo_wen), + .ififo_rdy(dispatcher2_ififo_rdy) +); +dispatcher dispatcher3( + .clk(clk), + .rst(reset), + .tx_tvalid(dispatcher3_tx_valid), + .tx_tdata(dispatcher3_tx_tdata), + .tx_tstrb(dispatcher3_tx_tstrb), + .tx_tkeep(dispatcher3_tx_tkeep), + .tx_tid(dispatcher3_tx_tid), + .tx_tdest(dispatcher3_tx_tdest), + .tx_tuser(dispatcher3_tx_tuser), + .tx_tlast(dispatcher3_tx_tlast), + .tx_tready(dispatcher3_connector_tx_tready), + .ififo_wdata(dispatcher3_ififo_wdata), + .ififo_wen(dispatcher3_ififo_wen), + .ififo_rdy(dispatcher3_ififo_rdy) +); + +wire [noc_dw-1:0] dispatcher0_data_dummy; +// dispatcher router blocks +noc_router_adapter_block_inst noc_router_input_dispatcher0( + .clk(clk), + .reset(reset), + .master_tready(1'd0), + .master_tdata(dispatcher0_data_dummy), + .master_tvalid(), + .master_tstrb(), + .master_tkeep(), + .master_tid(), + .master_tdest(), + .master_tuser(), + .master_tlast(), + .slave_tvalid(dispatcher0_tx_valid), + .slave_tready(dispatcher0_connector_tx_tready), + .slave_tdata(dispatcher0_tx_tdata), + .slave_tstrb(dispatcher0_tx_tstrb), + .slave_tkeep(dispatcher0_tx_tkeep), + .slave_tid(dispatcher0_tx_tid), + .slave_tdest(dispatcher0_tx_tdest), + .slave_tuser(dispatcher0_tx_tuser), + .slave_tlast(dispatcher0_tx_tlast) + +); +wire [noc_dw-1:0] dispatcher1_data_dummy; +noc_router_adapter_block_inst noc_router_input_dispatcher1( + .clk(clk), + .reset(reset), + .master_tready(1'd0), + .master_tdata(dispatcher1_data_dummy), + .master_tvalid(), + .master_tstrb(), + .master_tkeep(), + .master_tid(), + .master_tdest(), + .master_tuser(), + .master_tlast(), + .slave_tvalid(dispatcher1_tx_valid), + .slave_tready(dispatcher1_connector_tx_tready), + .slave_tdata(dispatcher1_tx_tdata), + .slave_tstrb(dispatcher1_tx_tstrb), + .slave_tkeep(dispatcher1_tx_tkeep), + .slave_tid(dispatcher1_tx_tid), + .slave_tdest(dispatcher1_tx_tdest), + .slave_tuser(dispatcher1_tx_tuser), + .slave_tlast(dispatcher1_tx_tlast) + +); +wire [noc_dw-1:0] dispatcher2_data_dummy; +noc_router_adapter_block_inst noc_router_input_dispatcher2( + .clk(clk), + .reset(reset), + .master_tready(1'd0), + .master_tdata(dispatcher2_data_dummy), + .master_tvalid(), + .master_tstrb(), + .master_tkeep(), + .master_tid(), + .master_tdest(), + .master_tuser(), + .master_tlast(), + .slave_tvalid(dispatcher2_tx_valid), + .slave_tready(dispatcher2_connector_tx_tready), + .slave_tdata(dispatcher2_tx_tdata), + .slave_tstrb(dispatcher2_tx_tstrb), + .slave_tkeep(dispatcher2_tx_tkeep), + .slave_tid(dispatcher2_tx_tid), + .slave_tdest(dispatcher2_tx_tdest), + .slave_tuser(dispatcher2_tx_tuser), + .slave_tlast(dispatcher2_tx_tlast) + +); + +wire [noc_dw-1:0] dispatcher3_data_dummy; +noc_router_adapter_block_inst noc_router_input_dispatcher3( + .clk(clk), + .reset(reset), + .master_tready(1'd0), + .master_tdata(dispatcher3_data_dummy), + .master_tvalid(), + .master_tstrb(), + .master_tkeep(), + .master_tid(), + .master_tdest(), + .master_tuser(), + .master_tlast(), + .slave_tvalid(dispatcher3_tx_valid), + .slave_tready(dispatcher3_connector_tx_tready), + .slave_tdata(dispatcher3_tx_tdata), + .slave_tstrb(dispatcher3_tx_tstrb), + .slave_tkeep(dispatcher3_tx_tkeep), + .slave_tid(dispatcher3_tx_tid), + .slave_tdest(dispatcher3_tx_tdest), + .slave_tuser(dispatcher3_tx_tuser), + .slave_tlast(dispatcher3_tx_tlast) + +); + +assign master_tdata_top_out = dispatcher0_data_dummy | dispatcher1_data_dummy | dispatcher2_data_dummy | dispatcher3_data_dummy; + + +// mvm module declarations (layer 0) +mvm_top layer0_mvm0( + .clk(clk), + .rst(reset), + .rx_tvalid(layer0_mvm0_rx_valid | rx_top_tvalid), + .rx_tdata(layer0_mvm0_rx_tdata | rx_top_tdata), + .rx_tstrb(layer0_mvm0_rx_tstrb | rx_top_tstrb), + .rx_tkeep(layer0_mvm0_rx_tkeep | rx_top_tkeep), + .rx_tid(layer0_mvm0_rx_tid | rx_top_tid), + .rx_tdest(layer0_mvm0_rx_tdest | rx_top_tdest), + .rx_tuser(layer0_mvm0_rx_tuser | rx_top_tuser), + .rx_tlast(layer0_mvm0_rx_tlast | rx_top_tkeep), + .rx_tready(layer0_mvm0_connector_rx_tready), + .tx_tvalid(layer0_mvm0_tx_valid), + .tx_tdata(layer0_mvm0_tx_tdata), + .tx_tstrb(layer0_mvm0_tx_tstrb), + .tx_tkeep(layer0_mvm0_tx_tkeep), + .tx_tid(layer0_mvm0_tx_tid), + .tx_tdest(layer0_mvm0_tx_tdest), + .tx_tuser(layer0_mvm0_tx_tuser), + .tx_tlast(layer0_mvm0_tx_tlast), + .tx_tready(layer0_mvm0_connector_tx_tready) + +); +mvm_top layer0_mvm1( + .clk(clk), + .rst(reset), + .rx_tvalid(layer0_mvm1_rx_valid | rx_top_tvalid), + .rx_tdata(layer0_mvm1_rx_tdata | rx_top_tdata), + .rx_tstrb(layer0_mvm1_rx_tstrb | rx_top_tstrb), + .rx_tkeep(layer0_mvm1_rx_tkeep | rx_top_tkeep), + .rx_tid(layer0_mvm1_rx_tid | rx_top_tid), + .rx_tdest(layer0_mvm1_rx_tdest | rx_top_tdest), + .rx_tuser(layer0_mvm1_rx_tuser | rx_top_tuser), + .rx_tlast(layer0_mvm1_rx_tlast | rx_top_tkeep), + .rx_tready(layer0_mvm1_connector_rx_tready), + .tx_tvalid(layer0_mvm1_tx_valid), + .tx_tdata(layer0_mvm1_tx_tdata), + .tx_tstrb(layer0_mvm1_tx_tstrb), + .tx_tkeep(layer0_mvm1_tx_tkeep), + .tx_tid(layer0_mvm1_tx_tid), + .tx_tdest(layer0_mvm1_tx_tdest), + .tx_tuser(layer0_mvm1_tx_tuser), + .tx_tlast(layer0_mvm1_tx_tlast), + .tx_tready(layer0_mvm1_connector_tx_tready) + +); +mvm_top layer0_mvm2( + .clk(clk), + .rst(reset), + .rx_tvalid(layer0_mvm2_rx_valid | rx_top_tvalid), + .rx_tdata(layer0_mvm2_rx_tdata | rx_top_tdata), + .rx_tstrb(layer0_mvm2_rx_tstrb | rx_top_tstrb), + .rx_tkeep(layer0_mvm2_rx_tkeep | rx_top_tkeep), + .rx_tid(layer0_mvm2_rx_tid | rx_top_tid), + .rx_tdest(layer0_mvm2_rx_tdest | rx_top_tdest), + .rx_tuser(layer0_mvm2_rx_tuser | rx_top_tuser), + .rx_tlast(layer0_mvm2_rx_tlast | rx_top_tkeep), + .rx_tready(layer0_mvm2_connector_rx_tready), + .tx_tvalid(layer0_mvm2_tx_valid), + .tx_tdata(layer0_mvm2_tx_tdata), + .tx_tstrb(layer0_mvm2_tx_tstrb), + .tx_tkeep(layer0_mvm2_tx_tkeep), + .tx_tid(layer0_mvm2_tx_tid), + .tx_tdest(layer0_mvm2_tx_tdest), + .tx_tuser(layer0_mvm2_tx_tuser), + .tx_tlast(layer0_mvm2_tx_tlast), + .tx_tready(layer0_mvm2_connector_tx_tready) + +); +mvm_top layer0_mvm3( + .clk(clk), + .rst(reset), + .rx_tvalid(layer0_mvm3_rx_valid | rx_top_tvalid), + .rx_tdata(layer0_mvm3_rx_tdata | rx_top_tdata), + .rx_tstrb(layer0_mvm3_rx_tstrb | rx_top_tstrb), + .rx_tkeep(layer0_mvm3_rx_tkeep | rx_top_tkeep), + .rx_tid(layer0_mvm3_rx_tid | rx_top_tid), + .rx_tdest(layer0_mvm3_rx_tdest | rx_top_tdest), + .rx_tuser(layer0_mvm3_rx_tuser | rx_top_tuser), + .rx_tlast(layer0_mvm3_rx_tlast | rx_top_tkeep), + .rx_tready(layer0_mvm3_connector_rx_tready), + .tx_tvalid(layer0_mvm3_tx_valid), + .tx_tdata(layer0_mvm3_tx_tdata), + .tx_tstrb(layer0_mvm3_tx_tstrb), + .tx_tkeep(layer0_mvm3_tx_tkeep), + .tx_tid(layer0_mvm3_tx_tid), + .tx_tdest(layer0_mvm3_tx_tdest), + .tx_tuser(layer0_mvm3_tx_tuser), + .tx_tlast(layer0_mvm3_tx_tlast), + .tx_tready(layer0_mvm3_connector_tx_tready) + +); + +// mvm module declarations (layer 1) +mvm_top layer1_mvm0( + .clk(clk), + .rst(reset), + .rx_tvalid(layer1_mvm0_rx_valid | rx_top_tvalid), + .rx_tdata(layer1_mvm0_rx_tdata | rx_top_tdata), + .rx_tstrb(layer1_mvm0_rx_tstrb | rx_top_tstrb), + .rx_tkeep(layer1_mvm0_rx_tkeep | rx_top_tkeep), + .rx_tid(layer1_mvm0_rx_tid | rx_top_tid), + .rx_tdest(layer1_mvm0_rx_tdest | rx_top_tdest), + .rx_tuser(layer1_mvm0_rx_tuser | rx_top_tuser), + .rx_tlast(layer1_mvm0_rx_tlast | rx_top_tkeep), + .rx_tready(layer1_mvm0_connector_rx_tready), + .tx_tvalid(layer1_mvm0_tx_valid), + .tx_tdata(layer1_mvm0_tx_tdata), + .tx_tstrb(layer1_mvm0_tx_tstrb), + .tx_tkeep(layer1_mvm0_tx_tkeep), + .tx_tid(layer1_mvm0_tx_tid), + .tx_tdest(layer1_mvm0_tx_tdest), + .tx_tuser(layer1_mvm0_tx_tuser), + .tx_tlast(layer1_mvm0_tx_tlast), + .tx_tready(layer1_mvm0_connector_tx_tready) + +); +mvm_top layer1_mvm1( + .clk(clk), + .rst(reset), + .rx_tvalid(layer1_mvm1_rx_valid | rx_top_tvalid), + .rx_tdata(layer1_mvm1_rx_tdata | rx_top_tdata), + .rx_tstrb(layer1_mvm1_rx_tstrb | rx_top_tstrb), + .rx_tkeep(layer1_mvm1_rx_tkeep | rx_top_tkeep), + .rx_tid(layer1_mvm1_rx_tid | rx_top_tid), + .rx_tdest(layer1_mvm1_rx_tdest | rx_top_tdest), + .rx_tuser(layer1_mvm1_rx_tuser | rx_top_tuser), + .rx_tlast(layer1_mvm1_rx_tlast | rx_top_tkeep), + .rx_tready(layer1_mvm1_connector_rx_tready), + .tx_tvalid(layer1_mvm1_tx_valid), + .tx_tdata(layer1_mvm1_tx_tdata), + .tx_tstrb(layer1_mvm1_tx_tstrb), + .tx_tkeep(layer1_mvm1_tx_tkeep), + .tx_tid(layer1_mvm1_tx_tid), + .tx_tdest(layer1_mvm1_tx_tdest), + .tx_tuser(layer1_mvm1_tx_tuser), + .tx_tlast(layer1_mvm1_tx_tlast), + .tx_tready(layer1_mvm1_connector_tx_tready) + +); +mvm_top layer1_mvm2( + .clk(clk), + .rst(reset), + .rx_tvalid(layer1_mvm2_rx_valid | rx_top_tvalid), + .rx_tdata(layer1_mvm2_rx_tdata | rx_top_tdata), + .rx_tstrb(layer1_mvm2_rx_tstrb | rx_top_tstrb), + .rx_tkeep(layer1_mvm2_rx_tkeep | rx_top_tkeep), + .rx_tid(layer1_mvm2_rx_tid | rx_top_tid), + .rx_tdest(layer1_mvm2_rx_tdest | rx_top_tdest), + .rx_tuser(layer1_mvm2_rx_tuser | rx_top_tuser), + .rx_tlast(layer1_mvm2_rx_tlast | rx_top_tkeep), + .rx_tready(layer1_mvm2_connector_rx_tready), + .tx_tvalid(layer1_mvm2_tx_valid), + .tx_tdata(layer1_mvm2_tx_tdata), + .tx_tstrb(layer1_mvm2_tx_tstrb), + .tx_tkeep(layer1_mvm2_tx_tkeep), + .tx_tid(layer1_mvm2_tx_tid), + .tx_tdest(layer1_mvm2_tx_tdest), + .tx_tuser(layer1_mvm2_tx_tuser), + .tx_tlast(layer1_mvm2_tx_tlast), + .tx_tready(layer1_mvm2_connector_tx_tready) + +); + + +// mvm module declarations (layer 2) +mvm_top layer2_mvm0( + .clk(clk), + .rst(reset), + .rx_tvalid(layer2_mvm0_rx_valid | rx_top_tvalid), + .rx_tdata(layer2_mvm0_rx_tdata | rx_top_tdata), + .rx_tstrb(layer2_mvm0_rx_tstrb | rx_top_tstrb), + .rx_tkeep(layer2_mvm0_rx_tkeep | rx_top_tkeep), + .rx_tid(layer2_mvm0_rx_tid | rx_top_tid), + .rx_tdest(layer2_mvm0_rx_tdest | rx_top_tdest), + .rx_tuser(layer2_mvm0_rx_tuser | rx_top_tuser), + .rx_tlast(layer2_mvm0_rx_tlast | rx_top_tkeep), + .rx_tready(layer2_mvm0_connector_rx_tready), + .tx_tvalid(layer2_mvm0_tx_valid), + .tx_tdata(layer2_mvm0_tx_tdata), + .tx_tstrb(layer2_mvm0_tx_tstrb), + .tx_tkeep(layer2_mvm0_tx_tkeep), + .tx_tid(layer2_mvm0_tx_tid), + .tx_tdest(layer2_mvm0_tx_tdest), + .tx_tuser(layer2_mvm0_tx_tuser), + .tx_tlast(layer2_mvm0_tx_tlast), + .tx_tready(layer2_mvm0_connector_tx_tready) + +); +mvm_top layer2_mvm1( + .clk(clk), + .rst(reset), + .rx_tvalid(layer2_mvm1_rx_valid | rx_top_tvalid), + .rx_tdata(layer2_mvm1_rx_tdata | rx_top_tdata), + .rx_tstrb(layer2_mvm1_rx_tstrb | rx_top_tstrb), + .rx_tkeep(layer2_mvm1_rx_tkeep | rx_top_tkeep), + .rx_tid(layer2_mvm1_rx_tid | rx_top_tid), + .rx_tdest(layer2_mvm1_rx_tdest | rx_top_tdest), + .rx_tuser(layer2_mvm1_rx_tuser | rx_top_tuser), + .rx_tlast(layer2_mvm1_rx_tlast | rx_top_tkeep), + .rx_tready(layer2_mvm1_connector_rx_tready), + .tx_tvalid(layer2_mvm1_tx_valid), + .tx_tdata(layer2_mvm1_tx_tdata), + .tx_tstrb(layer2_mvm1_tx_tstrb), + .tx_tkeep(layer2_mvm1_tx_tkeep), + .tx_tid(layer2_mvm1_tx_tid), + .tx_tdest(layer2_mvm1_tx_tdest), + .tx_tuser(layer2_mvm1_tx_tuser), + .tx_tlast(layer2_mvm1_tx_tlast), + .tx_tready(layer2_mvm1_connector_tx_tready) + +); + +// mvm module declarations (layer 3) +mvm_top layer3_mvm0( + .clk(clk), + .rst(reset), + .rx_tvalid(layer3_mvm0_rx_valid | rx_top_tvalid), + .rx_tdata(layer3_mvm0_rx_tdata | rx_top_tdata), + .rx_tstrb(layer3_mvm0_rx_tstrb | rx_top_tstrb), + .rx_tkeep(layer3_mvm0_rx_tkeep | rx_top_tkeep), + .rx_tid(layer3_mvm0_rx_tid | rx_top_tid), + .rx_tdest(layer3_mvm0_rx_tdest | rx_top_tdest), + .rx_tuser(layer3_mvm0_rx_tuser | rx_top_tuser), + .rx_tlast(layer3_mvm0_rx_tlast | rx_top_tkeep), + .rx_tready(layer3_mvm0_connector_rx_tready), + .tx_tvalid(layer3_mvm0_tx_valid), + .tx_tdata(layer3_mvm0_tx_tdata), + .tx_tstrb(layer3_mvm0_tx_tstrb), + .tx_tkeep(layer3_mvm0_tx_tkeep), + .tx_tid(layer3_mvm0_tx_tid), + .tx_tdest(layer3_mvm0_tx_tdest), + .tx_tuser(layer3_mvm0_tx_tuser), + .tx_tlast(layer3_mvm0_tx_tlast), + .tx_tready(layer3_mvm0_connector_tx_tready) + +); +mvm_top layer3_mvm1( + .clk(clk), + .rst(reset), + .rx_tvalid(layer3_mvm1_rx_valid | rx_top_tvalid), + .rx_tdata(layer3_mvm1_rx_tdata | rx_top_tdata), + .rx_tstrb(layer3_mvm1_rx_tstrb | rx_top_tstrb), + .rx_tkeep(layer3_mvm1_rx_tkeep | rx_top_tkeep), + .rx_tid(layer3_mvm1_rx_tid | rx_top_tid), + .rx_tdest(layer3_mvm1_rx_tdest | rx_top_tdest), + .rx_tuser(layer3_mvm1_rx_tuser | rx_top_tuser), + .rx_tlast(layer3_mvm1_rx_tlast | rx_top_tkeep), + .rx_tready(layer3_mvm1_connector_rx_tready), + .tx_tvalid(layer3_mvm1_tx_valid), + .tx_tdata(layer3_mvm1_tx_tdata), + .tx_tstrb(layer3_mvm1_tx_tstrb), + .tx_tkeep(layer3_mvm1_tx_tkeep), + .tx_tid(layer3_mvm1_tx_tid), + .tx_tdest(layer3_mvm1_tx_tdest), + .tx_tuser(layer3_mvm1_tx_tuser), + .tx_tlast(layer3_mvm1_tx_tlast), + .tx_tready(layer3_mvm1_connector_tx_tready) + +); + + +/*mvm matrix_vector_unit( + .clk(clk), + .rst(reset), + .rx_tvalid(dp_tx_valid), + .rx_tdata(dp_tx_tdata), + .rx_tstrb(dp_tx_tstrb), + .rx_tkeep(dp_tx_tkeep), + .rx_tid(dp_tx_tid), + .rx_tdest(dp_tx_tdest), + .rx_tuser(dp_tx_tuser), + .rx_tlast(mvm_rx_tlast), + .rx_tready(dp_connector_tx_tready), + .tx_tvalid(ct_rx_valid), + .tx_tdata(ct_rx_tdata), + .tx_tstrb(ct_rx_tstrb), + .tx_tkeep(ct_rx_tkeep), + .tx_tid(ct_rx_tid), + .tx_tdest(ct_rx_tdest), + .tx_tuser(ct_rx_tuser), + .tx_tlast(ct_rx_tlast), + .tx_tready(ct_connector_rx_tready) + +);*/ + + +// layer 0 router blocks +noc_router_adapter_block_inst noc_router_layer0_mvm0( + .clk(clk), + .reset(reset), + .master_tready(layer0_mvm0_connector_rx_tready), + .master_tdata(layer0_mvm0_rx_tdata), + .master_tvalid(layer0_mvm0_rx_valid), + .master_tstrb(layer0_mvm0_rx_tstrb), + .master_tkeep(layer0_mvm0_rx_tkeep), + .master_tid(layer0_mvm0_rx_tid), + .master_tdest(layer0_mvm0_rx_tdest), + .master_tuser(layer0_mvm0_rx_tuser), + .master_tlast(layer0_mvm0_rx_tlast), + .slave_tvalid(layer0_mvm0_tx_valid), + .slave_tready(layer0_mvm0_connector_tx_tready), + .slave_tdata(layer0_mvm0_tx_tdata), + .slave_tstrb(layer0_mvm0_tx_tstrb), + .slave_tkeep(layer0_mvm0_tx_tkeep), + .slave_tid(layer0_mvm0_tx_tid), + .slave_tdest(layer0_mvm0_tx_tdest), + .slave_tuser(layer0_mvm0_tx_tuser), + .slave_tlast(layer0_mvm0_tx_tlast) +); +noc_router_adapter_block_inst noc_router_layer0_mvm1( + .clk(clk), + .reset(reset), + .master_tready(layer0_mvm1_connector_rx_tready), + .master_tdata(layer0_mvm1_rx_tdata), + .master_tvalid(layer0_mvm1_rx_valid), + .master_tstrb(layer0_mvm1_rx_tstrb), + .master_tkeep(layer0_mvm1_rx_tkeep), + .master_tid(layer0_mvm1_rx_tid), + .master_tdest(layer0_mvm1_rx_tdest), + .master_tuser(layer0_mvm1_rx_tuser), + .master_tlast(layer0_mvm1_rx_tlast), + .slave_tvalid(layer0_mvm1_tx_valid), + .slave_tready(layer0_mvm1_connector_tx_tready), + .slave_tdata(layer0_mvm1_tx_tdata), + .slave_tstrb(layer0_mvm1_tx_tstrb), + .slave_tkeep(layer0_mvm1_tx_tkeep), + .slave_tid(layer0_mvm1_tx_tid), + .slave_tdest(layer0_mvm1_tx_tdest), + .slave_tuser(layer0_mvm1_tx_tuser), + .slave_tlast(layer0_mvm1_tx_tlast) +); +noc_router_adapter_block_inst noc_router_layer0_mvm2( + .clk(clk), + .reset(reset), + .master_tready(layer0_mvm2_connector_rx_tready), + .master_tdata(layer0_mvm2_rx_tdata), + .master_tvalid(layer0_mvm2_rx_valid), + .master_tstrb(layer0_mvm2_rx_tstrb), + .master_tkeep(layer0_mvm2_rx_tkeep), + .master_tid(layer0_mvm2_rx_tid), + .master_tdest(layer0_mvm2_rx_tdest), + .master_tuser(layer0_mvm2_rx_tuser), + .master_tlast(layer0_mvm2_rx_tlast), + .slave_tvalid(layer0_mvm2_tx_valid), + .slave_tready(layer0_mvm2_connector_tx_tready), + .slave_tdata(layer0_mvm2_tx_tdata), + .slave_tstrb(layer0_mvm2_tx_tstrb), + .slave_tkeep(layer0_mvm2_tx_tkeep), + .slave_tid(layer0_mvm2_tx_tid), + .slave_tdest(layer0_mvm2_tx_tdest), + .slave_tuser(layer0_mvm2_tx_tuser), + .slave_tlast(layer0_mvm2_tx_tlast) +); +noc_router_adapter_block_inst noc_router_layer0_mvm3( + .clk(clk), + .reset(reset), + .master_tready(layer0_mvm3_connector_rx_tready), + .master_tdata(layer0_mvm3_rx_tdata), + .master_tvalid(layer0_mvm3_rx_valid), + .master_tstrb(layer0_mvm3_rx_tstrb), + .master_tkeep(layer0_mvm3_rx_tkeep), + .master_tid(layer0_mvm3_rx_tid), + .master_tdest(layer0_mvm3_rx_tdest), + .master_tuser(layer0_mvm3_rx_tuser), + .master_tlast(layer0_mvm3_rx_tlast), + .slave_tvalid(layer0_mvm3_tx_valid), + .slave_tready(layer0_mvm3_connector_tx_tready), + .slave_tdata(layer0_mvm3_tx_tdata), + .slave_tstrb(layer0_mvm3_tx_tstrb), + .slave_tkeep(layer0_mvm3_tx_tkeep), + .slave_tid(layer0_mvm3_tx_tid), + .slave_tdest(layer0_mvm3_tx_tdest), + .slave_tuser(layer0_mvm3_tx_tuser), + .slave_tlast(layer0_mvm3_tx_tlast) +); + +// layer 1 router blocks +noc_router_adapter_block_inst noc_router_layer1_mvm0( + .clk(clk), + .reset(reset), + .master_tready(layer1_mvm0_connector_rx_tready), + .master_tdata(layer1_mvm0_rx_tdata), + .master_tvalid(layer1_mvm0_rx_valid), + .master_tstrb(layer1_mvm0_rx_tstrb), + .master_tkeep(layer1_mvm0_rx_tkeep), + .master_tid(layer1_mvm0_rx_tid), + .master_tdest(layer1_mvm0_rx_tdest), + .master_tuser(layer1_mvm0_rx_tuser), + .master_tlast(layer1_mvm0_rx_tlast), + .slave_tvalid(layer1_mvm0_tx_valid), + .slave_tready(layer1_mvm0_connector_tx_tready), + .slave_tdata(layer1_mvm0_tx_tdata), + .slave_tstrb(layer1_mvm0_tx_tstrb), + .slave_tkeep(layer1_mvm0_tx_tkeep), + .slave_tid(layer1_mvm0_tx_tid), + .slave_tdest(layer1_mvm0_tx_tdest), + .slave_tuser(layer1_mvm0_tx_tuser), + .slave_tlast(layer1_mvm0_tx_tlast) +); +noc_router_adapter_block_inst noc_router_layer1_mvm1( + .clk(clk), + .reset(reset), + .master_tready(layer1_mvm1_connector_rx_tready), + .master_tdata(layer1_mvm1_rx_tdata), + .master_tvalid(layer1_mvm1_rx_valid), + .master_tstrb(layer1_mvm1_rx_tstrb), + .master_tkeep(layer1_mvm1_rx_tkeep), + .master_tid(layer1_mvm1_rx_tid), + .master_tdest(layer1_mvm1_rx_tdest), + .master_tuser(layer1_mvm1_rx_tuser), + .master_tlast(layer1_mvm1_rx_tlast), + .slave_tvalid(layer1_mvm1_tx_valid), + .slave_tready(layer1_mvm1_connector_tx_tready), + .slave_tdata(layer1_mvm1_tx_tdata), + .slave_tstrb(layer1_mvm1_tx_tstrb), + .slave_tkeep(layer1_mvm1_tx_tkeep), + .slave_tid(layer1_mvm1_tx_tid), + .slave_tdest(layer1_mvm1_tx_tdest), + .slave_tuser(layer1_mvm1_tx_tuser), + .slave_tlast(layer1_mvm1_tx_tlast) +); +noc_router_adapter_block_inst noc_router_layer1_mvm2( + .clk(clk), + .reset(reset), + .master_tready(layer1_mvm2_connector_rx_tready), + .master_tdata(layer1_mvm2_rx_tdata), + .master_tvalid(layer1_mvm2_rx_valid), + .master_tstrb(layer1_mvm2_rx_tstrb), + .master_tkeep(layer1_mvm2_rx_tkeep), + .master_tid(layer1_mvm2_rx_tid), + .master_tdest(layer1_mvm2_rx_tdest), + .master_tuser(layer1_mvm2_rx_tuser), + .master_tlast(layer1_mvm2_rx_tlast), + .slave_tvalid(layer1_mvm2_tx_valid), + .slave_tready(layer1_mvm2_connector_tx_tready), + .slave_tdata(layer1_mvm2_tx_tdata), + .slave_tstrb(layer1_mvm2_tx_tstrb), + .slave_tkeep(layer1_mvm2_tx_tkeep), + .slave_tid(layer1_mvm2_tx_tid), + .slave_tdest(layer1_mvm2_tx_tdest), + .slave_tuser(layer1_mvm2_tx_tuser), + .slave_tlast(layer1_mvm2_tx_tlast) +); + + +// layer 2 router blocks +noc_router_adapter_block_inst noc_router_layer2_mvm0( + .clk(clk), + .reset(reset), + .master_tready(layer2_mvm0_connector_rx_tready), + .master_tdata(layer2_mvm0_rx_tdata), + .master_tvalid(layer2_mvm0_rx_valid), + .master_tstrb(layer2_mvm0_rx_tstrb), + .master_tkeep(layer2_mvm0_rx_tkeep), + .master_tid(layer2_mvm0_rx_tid), + .master_tdest(layer2_mvm0_rx_tdest), + .master_tuser(layer2_mvm0_rx_tuser), + .master_tlast(layer2_mvm0_rx_tlast), + .slave_tvalid(layer2_mvm0_tx_valid), + .slave_tready(layer2_mvm0_connector_tx_tready), + .slave_tdata(layer2_mvm0_tx_tdata), + .slave_tstrb(layer2_mvm0_tx_tstrb), + .slave_tkeep(layer2_mvm0_tx_tkeep), + .slave_tid(layer2_mvm0_tx_tid), + .slave_tdest(layer2_mvm0_tx_tdest), + .slave_tuser(layer2_mvm0_tx_tuser), + .slave_tlast(layer2_mvm0_tx_tlast) +); +noc_router_adapter_block_inst noc_router_layer2_mvm1( + .clk(clk), + .reset(reset), + .master_tready(layer2_mvm1_connector_rx_tready), + .master_tdata(layer2_mvm1_rx_tdata), + .master_tvalid(layer2_mvm1_rx_valid), + .master_tstrb(layer2_mvm1_rx_tstrb), + .master_tkeep(layer2_mvm1_rx_tkeep), + .master_tid(layer2_mvm1_rx_tid), + .master_tdest(layer2_mvm1_rx_tdest), + .master_tuser(layer2_mvm1_rx_tuser), + .master_tlast(layer2_mvm1_rx_tlast), + .slave_tvalid(layer2_mvm1_tx_valid), + .slave_tready(layer2_mvm1_connector_tx_tready), + .slave_tdata(layer2_mvm1_tx_tdata), + .slave_tstrb(layer2_mvm1_tx_tstrb), + .slave_tkeep(layer2_mvm1_tx_tkeep), + .slave_tid(layer2_mvm1_tx_tid), + .slave_tdest(layer2_mvm1_tx_tdest), + .slave_tuser(layer2_mvm1_tx_tuser), + .slave_tlast(layer2_mvm1_tx_tlast) +); + +// layer 3 router blocks +noc_router_adapter_block_inst noc_router_layer3_mvm0( + .clk(clk), + .reset(reset), + .master_tready(layer3_mvm0_connector_rx_tready), + .master_tdata(layer3_mvm0_rx_tdata), + .master_tvalid(layer3_mvm0_rx_valid), + .master_tstrb(layer3_mvm0_rx_tstrb), + .master_tkeep(layer3_mvm0_rx_tkeep), + .master_tid(layer3_mvm0_rx_tid), + .master_tdest(layer3_mvm0_rx_tdest), + .master_tuser(layer3_mvm0_rx_tuser), + .master_tlast(layer3_mvm0_rx_tlast), + .slave_tvalid(layer3_mvm0_tx_valid), + .slave_tready(layer3_mvm0_connector_tx_tready), + .slave_tdata(layer3_mvm0_tx_tdata), + .slave_tstrb(layer3_mvm0_tx_tstrb), + .slave_tkeep(layer3_mvm0_tx_tkeep), + .slave_tid(layer3_mvm0_tx_tid), + .slave_tdest(layer3_mvm0_tx_tdest), + .slave_tuser(layer3_mvm0_tx_tuser), + .slave_tlast(layer3_mvm0_tx_tlast) +); +noc_router_adapter_block_inst noc_router_layer3_mvm1( + .clk(clk), + .reset(reset), + .master_tready(layer3_mvm1_connector_rx_tready), + .master_tdata(layer3_mvm1_rx_tdata), + .master_tvalid(layer3_mvm1_rx_valid), + .master_tstrb(layer3_mvm1_rx_tstrb), + .master_tkeep(layer3_mvm1_rx_tkeep), + .master_tid(layer3_mvm1_rx_tid), + .master_tdest(layer3_mvm1_rx_tdest), + .master_tuser(layer3_mvm1_rx_tuser), + .master_tlast(layer3_mvm1_rx_tlast), + .slave_tvalid(layer3_mvm1_tx_valid), + .slave_tready(layer3_mvm1_connector_tx_tready), + .slave_tdata(layer3_mvm1_tx_tdata), + .slave_tstrb(layer3_mvm1_tx_tstrb), + .slave_tkeep(layer3_mvm1_tx_tkeep), + .slave_tid(layer3_mvm1_tx_tid), + .slave_tdest(layer3_mvm1_tx_tdest), + .slave_tuser(layer3_mvm1_tx_tuser), + .slave_tlast(layer3_mvm1_tx_tlast) +); + +// collector router block +noc_router_adapter_block_inst noc_router_output_collector( + .clk(clk), + .reset(reset), + .master_tready(collector_connector_rx_tready), + .master_tdata(collector_rx_tdata), + .master_tvalid(collector_rx_valid), + .master_tstrb(collector_rx_tstrb), + .master_tkeep(collector_rx_tkeep), + .master_tid(collector_rx_tid), + .master_tdest(collector_rx_tdest), + .master_tuser(collector_rx_tuser), + .master_tlast(collector_rx_tlast), + .slave_tvalid(1'd0), + .slave_tready(), + .slave_tdata(32'd0), + .slave_tstrb(8'd0), + .slave_tkeep(8'd0), + .slave_tid(8'd0), + .slave_tdest(8'd0), + .slave_tuser(8'd0), + .slave_tlast(1'd0) +); + +// collector unit +collector collector( + .clk(clk), + .rst(reset), + .rx_tvalid(collector_rx_valid), + .rx_tdata(collector_rx_tdata), + .rx_tstrb(collector_rx_tstrb), + .rx_tkeep(collector_rx_tkeep), + .rx_tid(collector_rx_tid), + .rx_tdest(collector_rx_tdest), + .rx_tuser(collector_rx_tuser), + .rx_tlast(collector_rx_tlast), + .rx_tready(collector_connector_rx_tready), + .ofifo_rdata(collector_ofifo_rdata), + .ofifo_ren(collector_ofifo_ren), + .ofifo_rdy(collector_ofifo_rdy) +); + +wire tvalid_temp_layer0, tvalid_temp_layer1, tvalid_temp_layer2, tvalid_temp_layer3; + +assign tvalid_temp_layer0 = layer0_mvm0_tx_valid | layer0_mvm1_tx_valid | layer0_mvm2_tx_valid | layer0_mvm3_tx_valid; +assign tvalid_temp_layer1 = layer1_mvm0_tx_valid | layer1_mvm1_tx_valid | layer1_mvm2_tx_valid; +assign tvalid_temp_layer2 = layer2_mvm0_tx_valid | layer2_mvm1_tx_valid; +assign tvalid_temp_layer3 = layer3_mvm0_tx_valid | layer3_mvm1_tx_valid; +assign tx_top_tvalid = tvalid_temp_layer0 | tvalid_temp_layer1 | tvalid_temp_layer2 | tvalid_temp_layer3; + +wire [511:0] tdata_temp_layer0, tdata_temp_layer1, tdata_temp_layer2, tdata_temp_layer3; + +assign tdata_temp_layer0 = layer0_mvm0_tx_tdata | layer0_mvm1_tx_tdata | layer0_mvm2_tx_tdata | layer0_mvm3_tx_tdata; +assign tdata_temp_layer1 = layer1_mvm0_tx_tdata | layer1_mvm1_tx_tdata | layer1_mvm2_tx_tdata; +assign tdata_temp_layer2 = layer2_mvm0_tx_tdata | layer2_mvm1_tx_tdata; +assign tdata_temp_layer3 = layer3_mvm0_tx_tdata | layer3_mvm1_tx_tdata; +assign tx_top_tdata = tdata_temp_layer0 | tdata_temp_layer1 | tdata_temp_layer2 | tdata_temp_layer3; + +wire [63:0] tstrb_temp_layer0, tstrb_temp_layer1, tstrb_temp_layer2, tstrb_temp_layer3; +assign tstrb_temp_layer0 = layer0_mvm0_tx_tstrb | layer0_mvm1_tx_tstrb | layer0_mvm2_tx_tstrb | layer0_mvm3_tx_tstrb; +assign tstrb_temp_layer1 = layer1_mvm0_tx_tstrb | layer1_mvm1_tx_tstrb | layer1_mvm2_tx_tstrb; +assign tstrb_temp_layer2 = layer2_mvm0_tx_tstrb | layer2_mvm1_tx_tstrb; +assign tstrb_temp_layer3 = layer3_mvm0_tx_tstrb | layer3_mvm1_tx_tstrb; +assign tx_top_tstrb = tstrb_temp_layer0 | tstrb_temp_layer1 | tstrb_temp_layer2 | tstrb_temp_layer3; + +wire [63:0] tkeep_temp_layer0, tkeep_temp_layer1, tkeep_temp_layer2, tkeep_temp_layer3; +assign tkeep_temp_layer0 = layer0_mvm0_tx_tkeep | layer0_mvm1_tx_tkeep | layer0_mvm2_tx_tkeep | layer0_mvm3_tx_tkeep; +assign tkeep_temp_layer1 = layer1_mvm0_tx_tkeep | layer1_mvm1_tx_tkeep | layer1_mvm2_tx_tkeep; +assign tkeep_temp_layer2 = layer2_mvm0_tx_tkeep | layer2_mvm1_tx_tkeep; +assign tkeep_temp_layer3 = layer3_mvm0_tx_tkeep | layer3_mvm1_tx_tkeep; +assign tx_top_tkeep = tkeep_temp_layer0 | tkeep_temp_layer1 | tkeep_temp_layer2 | tkeep_temp_layer3; + +wire [7:0] tid_temp_layer0, tid_temp_layer1, tid_temp_layer2, tid_temp_layer3; +assign tid_temp_layer0 = layer0_mvm0_tx_tid | layer0_mvm1_tx_tid | layer0_mvm2_tx_tid | layer0_mvm3_tx_tid; +assign tid_temp_layer1 = layer1_mvm0_tx_tid | layer1_mvm1_tx_tid | layer1_mvm2_tx_tid; +assign tid_temp_layer2 = layer2_mvm0_tx_tid | layer2_mvm1_tx_tid; +assign tid_temp_layer3 = layer3_mvm0_tx_tid | layer3_mvm1_tx_tid; +assign tx_top_tid = tid_temp_layer0 | tid_temp_layer1 | tid_temp_layer2 | tid_temp_layer3; + +wire [7:0] tdest_temp_layer0, tdest_temp_layer1, tdest_temp_layer2, tdest_temp_layer3; +assign tdest_temp_layer0 = layer0_mvm0_tx_tdest | layer0_mvm1_tx_tdest | layer0_mvm2_tx_tdest | layer0_mvm3_tx_tdest; +assign tdest_temp_layer1 = layer1_mvm0_tx_tdest | layer1_mvm1_tx_tdest | layer1_mvm2_tx_tdest; +assign tdest_temp_layer2 = layer2_mvm0_tx_tdest | layer2_mvm1_tx_tdest; +assign tdest_temp_layer3 = layer3_mvm0_tx_tdest | layer3_mvm1_tx_tdest; +assign tx_top_tdest = tdest_temp_layer0 | tdest_temp_layer1 | tdest_temp_layer2 | tdest_temp_layer3; + +wire [31:0] tuser_temp_layer0, tuser_temp_layer1, tuser_temp_layer2, tuser_temp_layer3; +assign tuser_temp_layer0 = layer0_mvm0_tx_tuser | layer0_mvm1_tx_tuser | layer0_mvm2_tx_tuser | layer0_mvm3_tx_tuser; +assign tuser_temp_layer1 = layer1_mvm0_tx_tuser | layer1_mvm1_tx_tuser | layer1_mvm2_tx_tuser; +assign tuser_temp_layer2 = layer2_mvm0_tx_tuser | layer2_mvm1_tx_tuser; +assign tuser_temp_layer3 = layer3_mvm0_tx_tuser | layer3_mvm1_tx_tuser; +assign tx_top_tuser = tuser_temp_layer0 | tuser_temp_layer1 | tuser_temp_layer2 | tuser_temp_layer3; + +wire tlast_temp_layer0, tlast_temp_layer1, tlast_temp_layer2, tlast_temp_layer3; +assign tlast_temp_layer0 = layer0_mvm0_tx_tlast | layer0_mvm1_tx_tlast | layer0_mvm2_tx_tlast | layer0_mvm3_tx_tlast; +assign tlast_temp_layer1 = layer1_mvm0_tx_tlast | layer1_mvm1_tx_tlast | layer1_mvm2_tx_tlast; +assign tlast_temp_layer2 = layer2_mvm0_tx_tlast | layer2_mvm1_tx_tlast; +assign tlast_temp_layer3 = layer3_mvm0_tx_tlast | layer3_mvm1_tx_tlast; +assign tx_top_tlast = tlast_temp_layer0 | tlast_temp_layer1 | tlast_temp_layer2 | tlast_temp_layer3; + +endmodule + +/* Simplified dispatcher used for FPT'23 */ + +module collector ( + clk, + rst, + rx_tvalid, + rx_tdata, + rx_tstrb, + rx_tkeep, + rx_tid, + rx_tdest, + rx_tuser, + rx_tlast, + rx_tready, + ofifo_rdata, + ofifo_ren, + ofifo_rdy +); + +input clk; +input rst; +// Rx interface +input rx_tvalid; +input [511:0] rx_tdata; +input [63:0] rx_tstrb; +input [63:0] rx_tkeep; +input [7:0] rx_tid; +input [7:0] rx_tdest; +input [31:0] rx_tuser; +input rx_tlast; +output rx_tready; +// External FIFO IO +output [63:0] ofifo_rdata; +input ofifo_ren; +output ofifo_rdy; + +wire fifo_push; +assign fifo_push = rx_tvalid && rx_tready; +wire fifo_full_signal, fifo_empty_signal; +wire [511:0] fifo_rdata; +assign ofifo_rdata[0] = fifo_rdata[0] ^ fifo_rdata[1] ^ fifo_rdata[2] ^ fifo_rdata[3] ^ fifo_rdata[4] ^ fifo_rdata[5] ^ fifo_rdata[6] ^ fifo_rdata[7]; +assign ofifo_rdata[1] = fifo_rdata[8] ^ fifo_rdata[9] ^ fifo_rdata[10] ^ fifo_rdata[11] ^ fifo_rdata[12] ^ fifo_rdata[13] ^ fifo_rdata[14] ^ fifo_rdata[15]; +assign ofifo_rdata[2] = fifo_rdata[16] ^ fifo_rdata[17] ^ fifo_rdata[18] ^ fifo_rdata[19] ^ fifo_rdata[20] ^ fifo_rdata[21] ^ fifo_rdata[22] ^ fifo_rdata[23]; +assign ofifo_rdata[3] = fifo_rdata[24] ^ fifo_rdata[25] ^ fifo_rdata[26] ^ fifo_rdata[27] ^ fifo_rdata[28] ^ fifo_rdata[29] ^ fifo_rdata[30] ^ fifo_rdata[31]; +assign ofifo_rdata[4] = fifo_rdata[32] ^ fifo_rdata[33] ^ fifo_rdata[34] ^ fifo_rdata[35] ^ fifo_rdata[36] ^ fifo_rdata[37] ^ fifo_rdata[38] ^ fifo_rdata[39]; +assign ofifo_rdata[5] = fifo_rdata[40] ^ fifo_rdata[41] ^ fifo_rdata[42] ^ fifo_rdata[43] ^ fifo_rdata[44] ^ fifo_rdata[45] ^ fifo_rdata[46] ^ fifo_rdata[47]; +assign ofifo_rdata[6] = fifo_rdata[48] ^ fifo_rdata[49] ^ fifo_rdata[50] ^ fifo_rdata[51] ^ fifo_rdata[52] ^ fifo_rdata[53] ^ fifo_rdata[54] ^ fifo_rdata[55]; +assign ofifo_rdata[7] = fifo_rdata[56] ^ fifo_rdata[57] ^ fifo_rdata[58] ^ fifo_rdata[59] ^ fifo_rdata[60] ^ fifo_rdata[61] ^ fifo_rdata[62] ^ fifo_rdata[63]; +assign ofifo_rdata[8] = fifo_rdata[64] ^ fifo_rdata[65] ^ fifo_rdata[66] ^ fifo_rdata[67] ^ fifo_rdata[68] ^ fifo_rdata[69] ^ fifo_rdata[70] ^ fifo_rdata[71]; +assign ofifo_rdata[9] = fifo_rdata[72] ^ fifo_rdata[73] ^ fifo_rdata[74] ^ fifo_rdata[75] ^ fifo_rdata[76] ^ fifo_rdata[77] ^ fifo_rdata[78] ^ fifo_rdata[79]; +assign ofifo_rdata[10] = fifo_rdata[80] ^ fifo_rdata[81] ^ fifo_rdata[82] ^ fifo_rdata[83] ^ fifo_rdata[84] ^ fifo_rdata[85] ^ fifo_rdata[86] ^ fifo_rdata[87]; +assign ofifo_rdata[11] = fifo_rdata[88] ^ fifo_rdata[89] ^ fifo_rdata[90] ^ fifo_rdata[91] ^ fifo_rdata[92] ^ fifo_rdata[93] ^ fifo_rdata[94] ^ fifo_rdata[95]; +assign ofifo_rdata[12] = fifo_rdata[96] ^ fifo_rdata[97] ^ fifo_rdata[98] ^ fifo_rdata[99] ^ fifo_rdata[100] ^ fifo_rdata[101] ^ fifo_rdata[102] ^ fifo_rdata[103]; +assign ofifo_rdata[13] = fifo_rdata[104] ^ fifo_rdata[105] ^ fifo_rdata[106] ^ fifo_rdata[107] ^ fifo_rdata[108] ^ fifo_rdata[109] ^ fifo_rdata[110] ^ fifo_rdata[111]; +assign ofifo_rdata[14] = fifo_rdata[112] ^ fifo_rdata[113] ^ fifo_rdata[114] ^ fifo_rdata[115] ^ fifo_rdata[116] ^ fifo_rdata[117] ^ fifo_rdata[118] ^ fifo_rdata[119]; +assign ofifo_rdata[15] = fifo_rdata[120] ^ fifo_rdata[121] ^ fifo_rdata[122] ^ fifo_rdata[123] ^ fifo_rdata[124] ^ fifo_rdata[125] ^ fifo_rdata[126] ^ fifo_rdata[127]; +assign ofifo_rdata[16] = fifo_rdata[128] ^ fifo_rdata[129] ^ fifo_rdata[130] ^ fifo_rdata[131] ^ fifo_rdata[132] ^ fifo_rdata[133] ^ fifo_rdata[134] ^ fifo_rdata[135]; +assign ofifo_rdata[17] = fifo_rdata[136] ^ fifo_rdata[137] ^ fifo_rdata[138] ^ fifo_rdata[139] ^ fifo_rdata[140] ^ fifo_rdata[141] ^ fifo_rdata[142] ^ fifo_rdata[143]; +assign ofifo_rdata[18] = fifo_rdata[144] ^ fifo_rdata[145] ^ fifo_rdata[146] ^ fifo_rdata[147] ^ fifo_rdata[148] ^ fifo_rdata[149] ^ fifo_rdata[150] ^ fifo_rdata[151]; +assign ofifo_rdata[19] = fifo_rdata[152] ^ fifo_rdata[153] ^ fifo_rdata[154] ^ fifo_rdata[155] ^ fifo_rdata[156] ^ fifo_rdata[157] ^ fifo_rdata[158] ^ fifo_rdata[159]; +assign ofifo_rdata[20] = fifo_rdata[160] ^ fifo_rdata[161] ^ fifo_rdata[162] ^ fifo_rdata[163] ^ fifo_rdata[164] ^ fifo_rdata[165] ^ fifo_rdata[166] ^ fifo_rdata[167]; +assign ofifo_rdata[21] = fifo_rdata[168] ^ fifo_rdata[169] ^ fifo_rdata[170] ^ fifo_rdata[171] ^ fifo_rdata[172] ^ fifo_rdata[173] ^ fifo_rdata[174] ^ fifo_rdata[175]; +assign ofifo_rdata[22] = fifo_rdata[176] ^ fifo_rdata[177] ^ fifo_rdata[178] ^ fifo_rdata[179] ^ fifo_rdata[180] ^ fifo_rdata[181] ^ fifo_rdata[182] ^ fifo_rdata[183]; +assign ofifo_rdata[23] = fifo_rdata[184] ^ fifo_rdata[185] ^ fifo_rdata[186] ^ fifo_rdata[187] ^ fifo_rdata[188] ^ fifo_rdata[189] ^ fifo_rdata[190] ^ fifo_rdata[191]; +assign ofifo_rdata[24] = fifo_rdata[192] ^ fifo_rdata[193] ^ fifo_rdata[194] ^ fifo_rdata[195] ^ fifo_rdata[196] ^ fifo_rdata[197] ^ fifo_rdata[198] ^ fifo_rdata[199]; +assign ofifo_rdata[25] = fifo_rdata[200] ^ fifo_rdata[201] ^ fifo_rdata[202] ^ fifo_rdata[203] ^ fifo_rdata[204] ^ fifo_rdata[205] ^ fifo_rdata[206] ^ fifo_rdata[207]; +assign ofifo_rdata[26] = fifo_rdata[208] ^ fifo_rdata[209] ^ fifo_rdata[210] ^ fifo_rdata[211] ^ fifo_rdata[212] ^ fifo_rdata[213] ^ fifo_rdata[214] ^ fifo_rdata[215]; +assign ofifo_rdata[27] = fifo_rdata[216] ^ fifo_rdata[217] ^ fifo_rdata[218] ^ fifo_rdata[219] ^ fifo_rdata[220] ^ fifo_rdata[221] ^ fifo_rdata[222] ^ fifo_rdata[223]; +assign ofifo_rdata[28] = fifo_rdata[224] ^ fifo_rdata[225] ^ fifo_rdata[226] ^ fifo_rdata[227] ^ fifo_rdata[228] ^ fifo_rdata[229] ^ fifo_rdata[230] ^ fifo_rdata[231]; +assign ofifo_rdata[29] = fifo_rdata[232] ^ fifo_rdata[233] ^ fifo_rdata[234] ^ fifo_rdata[235] ^ fifo_rdata[236] ^ fifo_rdata[237] ^ fifo_rdata[238] ^ fifo_rdata[239]; +assign ofifo_rdata[30] = fifo_rdata[240] ^ fifo_rdata[241] ^ fifo_rdata[242] ^ fifo_rdata[243] ^ fifo_rdata[244] ^ fifo_rdata[245] ^ fifo_rdata[246] ^ fifo_rdata[247]; +assign ofifo_rdata[31] = fifo_rdata[248] ^ fifo_rdata[249] ^ fifo_rdata[250] ^ fifo_rdata[251] ^ fifo_rdata[252] ^ fifo_rdata[253] ^ fifo_rdata[254] ^ fifo_rdata[255]; +assign ofifo_rdata[32] = fifo_rdata[256] ^ fifo_rdata[257] ^ fifo_rdata[258] ^ fifo_rdata[259] ^ fifo_rdata[260] ^ fifo_rdata[261] ^ fifo_rdata[262] ^ fifo_rdata[263]; +assign ofifo_rdata[33] = fifo_rdata[264] ^ fifo_rdata[265] ^ fifo_rdata[266] ^ fifo_rdata[267] ^ fifo_rdata[268] ^ fifo_rdata[269] ^ fifo_rdata[270] ^ fifo_rdata[271]; +assign ofifo_rdata[34] = fifo_rdata[272] ^ fifo_rdata[273] ^ fifo_rdata[274] ^ fifo_rdata[275] ^ fifo_rdata[276] ^ fifo_rdata[277] ^ fifo_rdata[278] ^ fifo_rdata[279]; +assign ofifo_rdata[35] = fifo_rdata[280] ^ fifo_rdata[281] ^ fifo_rdata[282] ^ fifo_rdata[283] ^ fifo_rdata[284] ^ fifo_rdata[285] ^ fifo_rdata[286] ^ fifo_rdata[287]; +assign ofifo_rdata[36] = fifo_rdata[288] ^ fifo_rdata[289] ^ fifo_rdata[290] ^ fifo_rdata[291] ^ fifo_rdata[292] ^ fifo_rdata[293] ^ fifo_rdata[294] ^ fifo_rdata[295]; +assign ofifo_rdata[37] = fifo_rdata[296] ^ fifo_rdata[297] ^ fifo_rdata[298] ^ fifo_rdata[299] ^ fifo_rdata[300] ^ fifo_rdata[301] ^ fifo_rdata[302] ^ fifo_rdata[303]; +assign ofifo_rdata[38] = fifo_rdata[304] ^ fifo_rdata[305] ^ fifo_rdata[306] ^ fifo_rdata[307] ^ fifo_rdata[308] ^ fifo_rdata[309] ^ fifo_rdata[310] ^ fifo_rdata[311]; +assign ofifo_rdata[39] = fifo_rdata[312] ^ fifo_rdata[313] ^ fifo_rdata[314] ^ fifo_rdata[315] ^ fifo_rdata[316] ^ fifo_rdata[317] ^ fifo_rdata[318] ^ fifo_rdata[319]; +assign ofifo_rdata[40] = fifo_rdata[320] ^ fifo_rdata[321] ^ fifo_rdata[322] ^ fifo_rdata[323] ^ fifo_rdata[324] ^ fifo_rdata[325] ^ fifo_rdata[326] ^ fifo_rdata[327]; +assign ofifo_rdata[41] = fifo_rdata[328] ^ fifo_rdata[329] ^ fifo_rdata[330] ^ fifo_rdata[331] ^ fifo_rdata[332] ^ fifo_rdata[333] ^ fifo_rdata[334] ^ fifo_rdata[335]; +assign ofifo_rdata[42] = fifo_rdata[336] ^ fifo_rdata[337] ^ fifo_rdata[338] ^ fifo_rdata[339] ^ fifo_rdata[340] ^ fifo_rdata[341] ^ fifo_rdata[342] ^ fifo_rdata[343]; +assign ofifo_rdata[43] = fifo_rdata[344] ^ fifo_rdata[345] ^ fifo_rdata[346] ^ fifo_rdata[347] ^ fifo_rdata[348] ^ fifo_rdata[349] ^ fifo_rdata[350] ^ fifo_rdata[351]; +assign ofifo_rdata[44] = fifo_rdata[352] ^ fifo_rdata[353] ^ fifo_rdata[354] ^ fifo_rdata[355] ^ fifo_rdata[356] ^ fifo_rdata[357] ^ fifo_rdata[358] ^ fifo_rdata[359]; +assign ofifo_rdata[45] = fifo_rdata[360] ^ fifo_rdata[361] ^ fifo_rdata[362] ^ fifo_rdata[363] ^ fifo_rdata[364] ^ fifo_rdata[365] ^ fifo_rdata[366] ^ fifo_rdata[367]; +assign ofifo_rdata[46] = fifo_rdata[368] ^ fifo_rdata[369] ^ fifo_rdata[370] ^ fifo_rdata[371] ^ fifo_rdata[372] ^ fifo_rdata[373] ^ fifo_rdata[374] ^ fifo_rdata[375]; +assign ofifo_rdata[47] = fifo_rdata[376] ^ fifo_rdata[377] ^ fifo_rdata[378] ^ fifo_rdata[379] ^ fifo_rdata[380] ^ fifo_rdata[381] ^ fifo_rdata[382] ^ fifo_rdata[383]; +assign ofifo_rdata[48] = fifo_rdata[384] ^ fifo_rdata[385] ^ fifo_rdata[386] ^ fifo_rdata[387] ^ fifo_rdata[388] ^ fifo_rdata[389] ^ fifo_rdata[390] ^ fifo_rdata[391]; +assign ofifo_rdata[49] = fifo_rdata[392] ^ fifo_rdata[393] ^ fifo_rdata[394] ^ fifo_rdata[395] ^ fifo_rdata[396] ^ fifo_rdata[397] ^ fifo_rdata[398] ^ fifo_rdata[399]; +assign ofifo_rdata[50] = fifo_rdata[400] ^ fifo_rdata[401] ^ fifo_rdata[402] ^ fifo_rdata[403] ^ fifo_rdata[404] ^ fifo_rdata[405] ^ fifo_rdata[406] ^ fifo_rdata[407]; +assign ofifo_rdata[51] = fifo_rdata[408] ^ fifo_rdata[409] ^ fifo_rdata[410] ^ fifo_rdata[411] ^ fifo_rdata[412] ^ fifo_rdata[413] ^ fifo_rdata[414] ^ fifo_rdata[415]; +assign ofifo_rdata[52] = fifo_rdata[416] ^ fifo_rdata[417] ^ fifo_rdata[418] ^ fifo_rdata[419] ^ fifo_rdata[420] ^ fifo_rdata[421] ^ fifo_rdata[422] ^ fifo_rdata[423]; +assign ofifo_rdata[53] = fifo_rdata[424] ^ fifo_rdata[425] ^ fifo_rdata[426] ^ fifo_rdata[427] ^ fifo_rdata[428] ^ fifo_rdata[429] ^ fifo_rdata[430] ^ fifo_rdata[431]; +assign ofifo_rdata[54] = fifo_rdata[432] ^ fifo_rdata[433] ^ fifo_rdata[434] ^ fifo_rdata[435] ^ fifo_rdata[436] ^ fifo_rdata[437] ^ fifo_rdata[438] ^ fifo_rdata[439]; +assign ofifo_rdata[55] = fifo_rdata[440] ^ fifo_rdata[441] ^ fifo_rdata[442] ^ fifo_rdata[443] ^ fifo_rdata[444] ^ fifo_rdata[445] ^ fifo_rdata[446] ^ fifo_rdata[447]; +assign ofifo_rdata[56] = fifo_rdata[448] ^ fifo_rdata[449] ^ fifo_rdata[450] ^ fifo_rdata[451] ^ fifo_rdata[452] ^ fifo_rdata[453] ^ fifo_rdata[454] ^ fifo_rdata[455]; +assign ofifo_rdata[57] = fifo_rdata[456] ^ fifo_rdata[457] ^ fifo_rdata[458] ^ fifo_rdata[459] ^ fifo_rdata[460] ^ fifo_rdata[461] ^ fifo_rdata[462] ^ fifo_rdata[463]; +assign ofifo_rdata[58] = fifo_rdata[464] ^ fifo_rdata[465] ^ fifo_rdata[466] ^ fifo_rdata[467] ^ fifo_rdata[468] ^ fifo_rdata[469] ^ fifo_rdata[470] ^ fifo_rdata[471]; +assign ofifo_rdata[59] = fifo_rdata[472] ^ fifo_rdata[473] ^ fifo_rdata[474] ^ fifo_rdata[475] ^ fifo_rdata[476] ^ fifo_rdata[477] ^ fifo_rdata[478] ^ fifo_rdata[479]; +assign ofifo_rdata[60] = fifo_rdata[480] ^ fifo_rdata[481] ^ fifo_rdata[482] ^ fifo_rdata[483] ^ fifo_rdata[484] ^ fifo_rdata[485] ^ fifo_rdata[486] ^ fifo_rdata[487]; +assign ofifo_rdata[61] = fifo_rdata[488] ^ fifo_rdata[489] ^ fifo_rdata[490] ^ fifo_rdata[491] ^ fifo_rdata[492] ^ fifo_rdata[493] ^ fifo_rdata[494] ^ fifo_rdata[495]; +assign ofifo_rdata[62] = fifo_rdata[496] ^ fifo_rdata[497] ^ fifo_rdata[498] ^ fifo_rdata[499] ^ fifo_rdata[500] ^ fifo_rdata[501] ^ fifo_rdata[502] ^ fifo_rdata[503]; +assign ofifo_rdata[63] = fifo_rdata[504] ^ fifo_rdata[505] ^ fifo_rdata[506] ^ fifo_rdata[507] ^ fifo_rdata[508] ^ fifo_rdata[509] ^ fifo_rdata[510] ^ fifo_rdata[511]; + + +fifo_collector fifo_inst ( + .clk(clk), + .rst(rst), + .push(fifo_push), + .idata(rx_tdata), + .pop(ofifo_ren), + .odata(fifo_rdata), + .empty(fifo_empty_signal), + .full(fifo_full_signal) +); + +assign rx_tready = !fifo_empty_signal; +assign ofifo_rdy = !fifo_full_signal; + +endmodule + +module fifo_collector ( + clk, + rst, + push, + idata, + pop, + odata, + empty, + full +); + +input wire clk; +input wire rst; +input wire push; +input wire [511:0] idata; +input wire pop; +output wire [511:0] odata; +output reg empty; +output reg full; + +reg [8:0] head_ptr; +reg [8:0] tail_ptr; + +bram_inst b0 (.clk(clk), .wen(push), .wdata(idata[(32*1)-1:32*0]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*1)-1:32*0])); +bram_inst b1 (.clk(clk), .wen(push), .wdata(idata[(32*2)-1:32*1]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*2)-1:32*1])); +bram_inst b2 (.clk(clk), .wen(push), .wdata(idata[(32*3)-1:32*2]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*3)-1:32*2])); +bram_inst b3 (.clk(clk), .wen(push), .wdata(idata[(32*4)-1:32*3]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*4)-1:32*3])); +bram_inst b4 (.clk(clk), .wen(push), .wdata(idata[(32*5)-1:32*4]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*5)-1:32*4])); +bram_inst b5 (.clk(clk), .wen(push), .wdata(idata[(32*6)-1:32*5]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*6)-1:32*5])); +bram_inst b6 (.clk(clk), .wen(push), .wdata(idata[(32*7)-1:32*6]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*7)-1:32*6])); +bram_inst b7 (.clk(clk), .wen(push), .wdata(idata[(32*8)-1:32*7]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*8)-1:32*7])); +bram_inst b8 (.clk(clk), .wen(push), .wdata(idata[(32*9)-1:32*8]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*9)-1:32*8])); +bram_inst b9 (.clk(clk), .wen(push), .wdata(idata[(32*10)-1:32*9]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*10)-1:32*9])); +bram_inst b10 (.clk(clk), .wen(push), .wdata(idata[(32*11)-1:32*10]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*11)-1:32*10])); +bram_inst b11 (.clk(clk), .wen(push), .wdata(idata[(32*12)-1:32*11]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*12)-1:32*11])); +bram_inst b12 (.clk(clk), .wen(push), .wdata(idata[(32*13)-1:32*12]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*13)-1:32*12])); +bram_inst b13 (.clk(clk), .wen(push), .wdata(idata[(32*14)-1:32*13]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*14)-1:32*13])); +bram_inst b14 (.clk(clk), .wen(push), .wdata(idata[(32*15)-1:32*14]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*15)-1:32*14])); +bram_inst b15 (.clk(clk), .wen(push), .wdata(idata[(32*16)-1:32*15]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*16)-1:32*15])); + +always @ (posedge clk) begin + if (rst) begin + head_ptr <= 0; + tail_ptr <= 0; + end else begin + if (push) tail_ptr <= tail_ptr + 1; + if (pop) head_ptr <= head_ptr + 1; + end +end + +always @ (*) begin + if (tail_ptr == head_ptr) begin + empty = 1'b1; + end else begin + empty = 1'b0; + end + + if (tail_ptr+1 == head_ptr) begin + full = 1'b1; + end else begin + full = 1'b0; + end +end + +endmodule + + +/* Simplified dispatcher used for FPT'23 */ + +module dispatcher ( + clk, + rst, + tx_tvalid, + tx_tdata, + tx_tstrb, + tx_tkeep, + tx_tid, + tx_tdest, + tx_tuser, + tx_tlast, + tx_tready, + ififo_wdata, + ififo_wen, + ififo_rdy +); + +input clk; +input rst; +// Tx interface +output tx_tvalid; +output [511:0] tx_tdata; +output [63:0] tx_tstrb; +output [63:0] tx_tkeep; +output [7:0] tx_tid; +output [7:0] tx_tdest; +output [31:0] tx_tuser; +output tx_tlast; +input tx_tready; +// External FIFO IO +input [63:0] ififo_wdata; +input ififo_wen; +output ififo_rdy; + +wire fifo_full_signal, fifo_almost_full_signal, fifo_empty_signal; +wire [511:0] fifo_rdata; +wire [511:0] fifo_wdata; +assign fifo_wdata[ 63: 0] = ififo_wdata; +assign fifo_wdata[127: 64] = ififo_wdata; +assign fifo_wdata[191:128] = ififo_wdata; +assign fifo_wdata[255:192] = ififo_wdata; +assign fifo_wdata[319:256] = ififo_wdata; +assign fifo_wdata[383:320] = ififo_wdata; +assign fifo_wdata[447:384] = ififo_wdata; +assign fifo_wdata[511:448] = ififo_wdata; + +fifo_dispatcher fifo_inst ( + .clk(clk), + .rst(rst), + .push(ififo_wen), + .idata(fifo_wdata), + .pop(tx_tvalid && tx_tready), + .odata(fifo_rdata), + .empty(fifo_empty_signal), + .full(fifo_full_signal) +); + +reg [63:0] r_tx_tstrb; +reg [63:0] r_tx_tkeep; +reg [7:0] r_tx_tid; +reg [7:0] r_tx_tdest; +reg [31:0] r_tx_tuser; +reg r_tx_tlast; + +always @ (posedge clk) begin + if (rst) begin + r_tx_tstrb <= 0; + r_tx_tkeep <= 0; + r_tx_tid <= 0; + r_tx_tdest <= 0; + r_tx_tuser <= 0; + r_tx_tlast <= 0; + end else begin + r_tx_tstrb <= ififo_wdata[63:0]; + r_tx_tkeep <= ififo_wdata[63:0]; + r_tx_tid <= ififo_wdata[7:0] ^ ififo_wdata[15:8]; + r_tx_tdest <= ififo_wdata[7:0] ^ ififo_wdata[23:16]; + r_tx_tuser <= ififo_wdata[31:0] ^ ififo_wdata[63:32]; + r_tx_tlast <= ififo_wdata[63] ^ ififo_wdata[0]; + end +end + +assign tx_tstrb = r_tx_tstrb; +assign tx_tkeep = r_tx_tkeep; +assign tx_tid = r_tx_tid; +assign tx_tdest = r_tx_tdest; +assign tx_tuser = r_tx_tuser; +assign tx_tlast = r_tx_tlast; + +assign tx_tvalid = !fifo_empty_signal; +assign tx_tdata = fifo_rdata; +assign ififo_rdy = !fifo_full_signal; + +endmodule + +module fifo_dispatcher ( + clk, + rst, + push, + idata, + pop, + odata, + empty, + full +); + +input wire clk; +input wire rst; +input wire push; +input wire [511:0] idata; +input wire pop; +output wire [511:0] odata; +output reg empty; +output reg full; + +reg [8:0] head_ptr; +reg [8:0] tail_ptr; + +bram_inst b0 (.clk(clk), .wen(push), .wdata(idata[(32*1)-1:32*0]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*1)-1:32*0])); +bram_inst b1 (.clk(clk), .wen(push), .wdata(idata[(32*2)-1:32*1]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*2)-1:32*1])); +bram_inst b2 (.clk(clk), .wen(push), .wdata(idata[(32*3)-1:32*2]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*3)-1:32*2])); +bram_inst b3 (.clk(clk), .wen(push), .wdata(idata[(32*4)-1:32*3]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*4)-1:32*3])); +bram_inst b4 (.clk(clk), .wen(push), .wdata(idata[(32*5)-1:32*4]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*5)-1:32*4])); +bram_inst b5 (.clk(clk), .wen(push), .wdata(idata[(32*6)-1:32*5]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*6)-1:32*5])); +bram_inst b6 (.clk(clk), .wen(push), .wdata(idata[(32*7)-1:32*6]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*7)-1:32*6])); +bram_inst b7 (.clk(clk), .wen(push), .wdata(idata[(32*8)-1:32*7]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*8)-1:32*7])); +bram_inst b8 (.clk(clk), .wen(push), .wdata(idata[(32*9)-1:32*8]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*9)-1:32*8])); +bram_inst b9 (.clk(clk), .wen(push), .wdata(idata[(32*10)-1:32*9]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*10)-1:32*9])); +bram_inst b10 (.clk(clk), .wen(push), .wdata(idata[(32*11)-1:32*10]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*11)-1:32*10])); +bram_inst b11 (.clk(clk), .wen(push), .wdata(idata[(32*12)-1:32*11]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*12)-1:32*11])); +bram_inst b12 (.clk(clk), .wen(push), .wdata(idata[(32*13)-1:32*12]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*13)-1:32*12])); +bram_inst b13 (.clk(clk), .wen(push), .wdata(idata[(32*14)-1:32*13]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*14)-1:32*13])); +bram_inst b14 (.clk(clk), .wen(push), .wdata(idata[(32*15)-1:32*14]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*15)-1:32*14])); +bram_inst b15 (.clk(clk), .wen(push), .wdata(idata[(32*16)-1:32*15]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*16)-1:32*15])); + +always @ (posedge clk) begin + if (rst) begin + head_ptr <= 0; + tail_ptr <= 0; + end else begin + if (push) tail_ptr <= tail_ptr + 1; + if (pop) head_ptr <= head_ptr + 1; + end +end + +always @ (*) begin + if (tail_ptr == head_ptr) begin + empty = 1'b1; + end else begin + empty = 1'b0; + end + + if (tail_ptr+1 == head_ptr) begin + full = 1'b1; + end else begin + full = 1'b0; + end +end + +endmodule + +module mvm_top ( + clk, + rst, + rx_tvalid, + rx_tdata, + rx_tstrb, + rx_tkeep, + rx_tid, + rx_tdest, + rx_tuser, + rx_tlast, + rx_tready, + tx_tvalid, + tx_tdata, + tx_tstrb, + tx_tkeep, + tx_tid, + tx_tdest, + tx_tuser, + tx_tlast, + tx_tready +); + +input wire clk; +input wire rst; +// Rx interface +input wire rx_tvalid; +input wire [511:0] rx_tdata; +input wire [ 63:0] rx_tstrb; +input wire [ 63:0] rx_tkeep; +input wire [ 7:0] rx_tid; +input wire [ 7:0] rx_tdest; +input wire [ 31:0] rx_tuser; +input wire rx_tlast; +output wire rx_tready; +// Tx interface +output wire tx_tvalid; +output wire [511:0] tx_tdata; +output wire [ 63:0] tx_tstrb; +output wire [ 63:0] tx_tkeep; +output wire [ 7:0] tx_tid; +output wire [ 7:0] tx_tdest; +output wire [ 31:0] tx_tuser; +output wire tx_tlast; +input wire tx_tready; + +// Hook up unused Rx signals to dummy registers to avoid being synthesized away +reg [63:0] dummy_rx_tstrb; +reg [63:0] dummy_rx_tkeep; +reg [63:0] dummy_rx_tdest; + +always @ (posedge clk) begin + dummy_rx_tstrb <= rx_tstrb; + dummy_rx_tkeep <= rx_tkeep; + dummy_rx_tdest <= rx_tdest; +end + +wire [8:0] inst_raddr; +assign inst_raddr = rx_tuser[15:9]; +wire [8:0] inst_waddr; +assign inst_waddr = rx_tuser[8:0]; +wire inst_wen; +assign inst_wen = (rx_tid == 0); +wire [511:0] inst_wdata; +assign inst_wdata = rx_tdata; +wire [511:0] inst_rdata; +wire [8:0] inst_rf_raddr, inst_accum_raddr; +wire inst_reduce, inst_accum_en, inst_release, inst_jump, inst_en, inst_last; + +memory_block instruction_fifo (.clk(clk), .waddr(inst_waddr), .wen(inst_wen), .wdata(inst_wdata), .raddr(inst_raddr), .rdata(inst_rdata)); + +assign inst_rf_raddr = inst_rdata[23:15]; +assign inst_accum_raddr = inst_rdata[14:6]; +assign inst_last = inst_rdata[5]; +assign inst_reduce = inst_rdata[2]; +assign inst_accum_en = inst_rdata[3]; +assign inst_release = inst_rdata[4]; +assign inst_jump = inst_rdata[1]; +assign inst_en = inst_rdata[0]; + +wire input_fifo_empty, input_fifo_full; +wire [511:0] input_fifo_idata; +assign input_fifo_idata = rx_tdata; +wire [511:0] input_fifo_odata; +wire input_fifo_push, input_fifo_pop; +assign input_fifo_push = (rx_tid == 2); +assign input_fifo_pop = inst_last; + +fifo_mvm input_fifo (.clk(clk), .rst(rst), .push(input_fifo_push), .idata(input_fifo_idata), .pop(input_fifo_pop), .odata(input_fifo_odata), .empty(input_fifo_empty), .full(input_fifo_full)); + +wire reduction_fifo_empty, reduction_fifo_full; +wire [511:0] reduction_fifo_idata; +assign reduction_fifo_idata = rx_tdata; +wire [511:0] reduction_fifo_odata; +wire reduction_fifo_push, reduction_fifo_pop; +assign reduction_fifo_push = (rx_tid == 1); +assign reduction_fifo_pop = inst_reduce && !reduction_fifo_empty; + +fifo_mvm reduction_fifo (.clk(clk), .rst(rst), .push(reduction_fifo_push), .idata(reduction_fifo_idata), .pop(reduction_fifo_pop), .odata(reduction_fifo_odata), .empty(reduction_fifo_empty), .full(reduction_fifo_full)); + +wire [8:0] accum_mem_waddr; +wire [511:0] accum_mem_rdata; +wire [17:0] temp_accum_addr, delay_accum_addr; +assign temp_accum_addr = {9'b0, inst_accum_raddr}; + +dpe_pipeline accum_addr_pipeline (.clk(clk), .rst(rst), .data_in(temp_accum_addr), .data_out(delay_accum_addr)); + +memory_block accum_mem (.clk(clk), .waddr(delay_accum_addr[8:0]), .wen(dpe_ovalid[0]), .wdata(dpe_results), .raddr(inst_accum_raddr), .rdata(accum_mem_rdata)); + +wire [8:0] rf_waddr; +assign rf_waddr = rx_tuser[8:0]; +wire [511:0] rf_wdata; +assign rf_wdata = rx_tdata; +wire [15:0] rf_wen; +assign rf_wen = rx_tuser[24:9]; +wire [511:0] rf_rdata_1, rf_rdata_2, rf_rdata_3, rf_rdata_4, rf_rdata_5, rf_rdata_6, rf_rdata_7, rf_rdata_8, + rf_rdata_9, rf_rdata_10, rf_rdata_11, rf_rdata_12, rf_rdata_13, rf_rdata_14, rf_rdata_15, rf_rdata_16; + +output wire [511:0] dpe_results; +output wire [15:0] dpe_ovalid; + +memory_block rf_01(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[0]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_1)); +memory_block rf_02(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[1]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_2)); +memory_block rf_03(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[2]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_3)); +memory_block rf_04(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[3]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_4)); +memory_block rf_05(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[4]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_5)); +memory_block rf_06(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[5]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_6)); +memory_block rf_07(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[6]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_7)); +memory_block rf_08(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[7]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_8)); +memory_block rf_09(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[8]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_9)); +memory_block rf_10(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[9]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_10)); +memory_block rf_11(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[10]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_11)); +memory_block rf_12(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[11]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_12)); +memory_block rf_13(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[12]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_13)); +memory_block rf_14(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[13]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_14)); +memory_block rf_15(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[14]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_15)); +memory_block rf_16(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[15]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_16)); + +wire dpe_ivalid; +assign dpe_ivalid = inst_en && inst_release; +dpe dpe_01 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_1), .datac(reduction_fifo_odata[(32*1)-1:32*0]), .datad(accum_mem_rdata[(32*1)-1:32*0]), .result(dpe_results[(32*1)-1:32*0]), .ovalid(dpe_ovalid[0])); +dpe dpe_02 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_2), .datac(reduction_fifo_odata[(32*2)-1:32*1]), .datad(accum_mem_rdata[(32*2)-1:32*1]), .result(dpe_results[(32*2)-1:32*1]), .ovalid(dpe_ovalid[1])); +dpe dpe_03 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_3), .datac(reduction_fifo_odata[(32*3)-1:32*2]), .datad(accum_mem_rdata[(32*3)-1:32*2]), .result(dpe_results[(32*3)-1:32*2]), .ovalid(dpe_ovalid[2])); +dpe dpe_04 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_4), .datac(reduction_fifo_odata[(32*4)-1:32*3]), .datad(accum_mem_rdata[(32*4)-1:32*3]), .result(dpe_results[(32*4)-1:32*3]), .ovalid(dpe_ovalid[3])); +dpe dpe_05 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_5), .datac(reduction_fifo_odata[(32*5)-1:32*4]), .datad(accum_mem_rdata[(32*5)-1:32*4]), .result(dpe_results[(32*5)-1:32*4]), .ovalid(dpe_ovalid[4])); +dpe dpe_06 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_6), .datac(reduction_fifo_odata[(32*6)-1:32*5]), .datad(accum_mem_rdata[(32*6)-1:32*5]), .result(dpe_results[(32*6)-1:32*5]), .ovalid(dpe_ovalid[5])); +dpe dpe_07 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_7), .datac(reduction_fifo_odata[(32*7)-1:32*6]), .datad(accum_mem_rdata[(32*7)-1:32*6]), .result(dpe_results[(32*7)-1:32*6]), .ovalid(dpe_ovalid[6])); +dpe dpe_08 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_8), .datac(reduction_fifo_odata[(32*8)-1:32*7]), .datad(accum_mem_rdata[(32*8)-1:32*7]), .result(dpe_results[(32*8)-1:32*7]), .ovalid(dpe_ovalid[7])); +dpe dpe_09 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_9), .datac(reduction_fifo_odata[(32*9)-1:32*8]), .datad(accum_mem_rdata[(32*9)-1:32*8]), .result(dpe_results[(32*9)-1:32*8]), .ovalid(dpe_ovalid[8])); +dpe dpe_10 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_10), .datac(reduction_fifo_odata[(32*10)-1:32*9]), .datad(accum_mem_rdata[(32*10)-1:32*9]), .result(dpe_results[(32*10)-1:32*9]), .ovalid(dpe_ovalid[9])); +dpe dpe_11 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_11), .datac(reduction_fifo_odata[(32*11)-1:32*10]), .datad(accum_mem_rdata[(32*11)-1:32*10]), .result(dpe_results[(32*11)-1:32*10]), .ovalid(dpe_ovalid[10])); +dpe dpe_12 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_12), .datac(reduction_fifo_odata[(32*12)-1:32*11]), .datad(accum_mem_rdata[(32*12)-1:32*11]), .result(dpe_results[(32*12)-1:32*11]), .ovalid(dpe_ovalid[11])); +dpe dpe_13 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_13), .datac(reduction_fifo_odata[(32*13)-1:32*12]), .datad(accum_mem_rdata[(32*13)-1:32*12]), .result(dpe_results[(32*13)-1:32*12]), .ovalid(dpe_ovalid[12])); +dpe dpe_14 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_14), .datac(reduction_fifo_odata[(32*14)-1:32*13]), .datad(accum_mem_rdata[(32*14)-1:32*13]), .result(dpe_results[(32*14)-1:32*13]), .ovalid(dpe_ovalid[13])); +dpe dpe_15 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_15), .datac(reduction_fifo_odata[(32*15)-1:32*14]), .datad(accum_mem_rdata[(32*15)-1:32*14]), .result(dpe_results[(32*15)-1:32*14]), .ovalid(dpe_ovalid[14])); +dpe dpe_16 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_16), .datac(reduction_fifo_odata[(32*16)-1:32*15]), .datad(accum_mem_rdata[(32*16)-1:32*15]), .result(dpe_results[(32*16)-1:32*15]), .ovalid(dpe_ovalid[15])); + +wire output_fifo_empty, output_fifo_full; +wire [511:0] output_fifo_odata; +wire output_fifo_pop; +assign output_fifo_pop = tx_tready && !output_fifo_empty; +fifo_mvm output_fifo (.clk(clk), .rst(rst), .push(dpe_ovalid[0]), .idata(dpe_results), .pop(output_fifo_pop), .odata(output_fifo_odata), .empty(output_fifo_empty), .full(output_fifo_full)); + +reg [ 63:0] r_tx_tstrb; +reg [ 63:0] r_tx_tkeep; +reg [ 7:0] r_tx_tid; +reg [ 7:0] r_tx_tdest; +reg [ 31:0] r_tx_tuser; +reg r_tx_tlast; +always @ (posedge clk) begin + if (rst) begin + r_tx_tstrb <= 0; + r_tx_tkeep <= 0; + r_tx_tid <= 0; + r_tx_tdest <= 0; + r_tx_tuser <= 0; + r_tx_tlast <= 0; + end else begin + r_tx_tstrb <= rx_tstrb; + r_tx_tkeep <= rx_tkeep; + r_tx_tid <= rx_tid; + r_tx_tdest <= rx_tdest; + r_tx_tuser <= rx_tuser; + r_tx_tlast <= rx_tlast; + end +end + +assign tx_tvalid = tx_tready && !output_fifo_empty; +assign tx_tdata = output_fifo_odata; +assign tx_tstrb = r_tx_tstrb; +assign tx_tkeep = r_tx_tkeep; +assign tx_tid = r_tx_tid; +assign tx_tdest = r_tx_tdest; +assign tx_tuser = r_tx_tuser; +assign tx_tlast = r_tx_tlast; +assign rx_tready = !input_fifo_full; + +endmodule + + +module dpe ( + clk, + rst, + ivalid, + accum, + reduce, + dataa, + datab, + datac, + datad, + result, + ovalid +); + +input wire clk; +input wire rst; +input wire ivalid; +input wire accum; +input wire reduce; +input wire [511:0] dataa; +input wire [511:0] datab; +input wire [ 31:0] datac; +input wire [ 31:0] datad; +output wire [ 31:0] result; +output wire ovalid; + +wire [36:0] chain_atom01_to_atom00, chain_atom02_to_atom01, chain_atom03_to_atom02, chain_atom04_to_atom03, + chain_atom05_to_atom04, chain_atom06_to_atom05, chain_atom07_to_atom06, chain_atom08_to_atom07, + chain_atom09_to_atom08, chain_atom10_to_atom09, chain_atom11_to_atom10, chain_atom12_to_atom11, + chain_atom13_to_atom12, chain_atom14_to_atom13, chain_atom15_to_atom14, dummy_chain; +wire [31:0] res15, res14, res13, res12, res11, res10, res09, res08, res07, res06, res05, res04, res03, res02, res01, res00; + +wire [33:0] temp_datac, temp_datad, delay_datac, delay_datad; +assign temp_datac = {ivalid, accum, datac}; +assign temp_datad = {ivalid, reduce, datad}; + +dpe_pipeline datac_pipe (.clk(clk), .rst(rst), .data_in(temp_datac), .data_out(delay_datac)); +dpe_pipeline datad_pipe (.clk(clk), .rst(rst), .data_in(temp_datad), .data_out(delay_datad)); + +dsp_inst d15(.clk(clk), .reset(rst), .ax(dataa[ (16*1)-1: 16*0]), .ay(datab[ (16*1)-1: 16*0]), .bx(dataa[ (16*2)-1: 16*1]), .by(datab[ (16*2)-1: 16*1]), .chainin( 37'd0), .result(res15), .chainout(chain_atom15_to_atom14)); +dsp_inst d14(.clk(clk), .reset(rst), .ax(dataa[ (16*3)-1: 16*2]), .ay(datab[ (16*3)-1: 16*2]), .bx(dataa[ (16*4)-1: 16*3]), .by(datab[ (16*4)-1: 16*3]), .chainin(chain_atom15_to_atom14), .result(res14), .chainout(chain_atom14_to_atom13)); +dsp_inst d13(.clk(clk), .reset(rst), .ax(dataa[ (16*5)-1: 16*4]), .ay(datab[ (16*5)-1: 16*4]), .bx(dataa[ (16*6)-1: 16*5]), .by(datab[ (16*6)-1: 16*5]), .chainin(chain_atom14_to_atom13), .result(res13), .chainout(chain_atom13_to_atom12)); +dsp_inst d12(.clk(clk), .reset(rst), .ax(dataa[ (16*7)-1: 16*6]), .ay(datab[ (16*7)-1: 16*6]), .bx(dataa[ (16*8)-1: 16*7]), .by(datab[ (16*8)-1: 16*7]), .chainin(chain_atom13_to_atom12), .result(res12), .chainout(chain_atom12_to_atom11)); +dsp_inst d11(.clk(clk), .reset(rst), .ax(dataa[ (16*9)-1: 16*8]), .ay(datab[ (16*9)-1: 16*8]), .bx(dataa[(16*10)-1: 16*9]), .by(datab[(16*10)-1: 16*9]), .chainin(chain_atom12_to_atom11), .result(res11), .chainout(chain_atom11_to_atom10)); +dsp_inst d10(.clk(clk), .reset(rst), .ax(dataa[(16*11)-1:16*10]), .ay(datab[(16*11)-1:16*10]), .bx(dataa[(16*12)-1:16*11]), .by(datab[(16*12)-1:16*11]), .chainin(chain_atom11_to_atom10), .result(res10), .chainout(chain_atom10_to_atom09)); +dsp_inst d09(.clk(clk), .reset(rst), .ax(dataa[(16*13)-1:16*12]), .ay(datab[(16*13)-1:16*12]), .bx(dataa[(16*14)-1:16*13]), .by(datab[(16*14)-1:16*13]), .chainin(chain_atom10_to_atom09), .result(res09), .chainout(chain_atom09_to_atom08)); +dsp_inst d08(.clk(clk), .reset(rst), .ax(dataa[(16*15)-1:16*14]), .ay(datab[(16*15)-1:16*14]), .bx(dataa[(16*16)-1:16*15]), .by(datab[(16*16)-1:16*15]), .chainin(chain_atom09_to_atom08), .result(res08), .chainout(chain_atom08_to_atom07)); +dsp_inst d07(.clk(clk), .reset(rst), .ax(dataa[(16*17)-1:16*16]), .ay(datab[(16*17)-1:16*16]), .bx(dataa[(16*18)-1:16*17]), .by(datab[(16*18)-1:16*17]), .chainin(chain_atom08_to_atom07), .result(res07), .chainout(chain_atom07_to_atom06)); +dsp_inst d06(.clk(clk), .reset(rst), .ax(dataa[(16*19)-1:16*18]), .ay(datab[(16*19)-1:16*18]), .bx(dataa[(16*20)-1:16*19]), .by(datab[(16*20)-1:16*19]), .chainin(chain_atom07_to_atom06), .result(res06), .chainout(chain_atom06_to_atom05)); +dsp_inst d05(.clk(clk), .reset(rst), .ax(dataa[(16*21)-1:16*20]), .ay(datab[(16*21)-1:16*20]), .bx(dataa[(16*22)-1:16*21]), .by(datab[(16*22)-1:16*21]), .chainin(chain_atom06_to_atom05), .result(res05), .chainout(chain_atom05_to_atom04)); +dsp_inst d04(.clk(clk), .reset(rst), .ax(dataa[(16*23)-1:16*22]), .ay(datab[(16*23)-1:16*22]), .bx(dataa[(16*24)-1:16*23]), .by(datab[(16*24)-1:16*23]), .chainin(chain_atom05_to_atom04), .result(res04), .chainout(chain_atom04_to_atom03)); +dsp_inst d03(.clk(clk), .reset(rst), .ax(dataa[(16*25)-1:16*24]), .ay(datab[(16*25)-1:16*24]), .bx(dataa[(16*26)-1:16*25]), .by(datab[(16*26)-1:16*25]), .chainin(chain_atom04_to_atom03), .result(res03), .chainout(chain_atom03_to_atom02)); +dsp_inst d02(.clk(clk), .reset(rst), .ax(dataa[(16*27)-1:16*26]), .ay(datab[(16*27)-1:16*26]), .bx(dataa[(16*28)-1:16*27]), .by(datab[(16*28)-1:16*27]), .chainin(chain_atom03_to_atom02), .result(res02), .chainout(chain_atom02_to_atom01)); +dsp_inst d01(.clk(clk), .reset(rst), .ax(dataa[(16*29)-1:16*28]), .ay(datab[(16*29)-1:16*28]), .bx(dataa[(16*30)-1:16*29]), .by(datab[(16*30)-1:16*29]), .chainin(chain_atom02_to_atom01), .result(res01), .chainout(chain_atom01_to_atom00)); +dsp_inst d00(.clk(clk), .reset(rst), .ax(dataa[(16*31)-1:16*30]), .ay(datab[(16*31)-1:16*30]), .bx(dataa[(16*32)-1:16*31]), .by(datab[(16*32)-1:16*31]), .chainin(chain_atom01_to_atom00), .result(res00), .chainout( dummy_chain)); + +reg [31:0] r_result; +reg r_ovalid; + +always @ (posedge clk) begin + if (rst) begin + r_result <= 0; + r_ovalid <= 1'b0; + end else begin + if (delay_datac[33]) begin + if (delay_datac[32] && delay_datad[32]) begin + r_result <= res00 + delay_datac[31:0] + delay_datad[31:0]; + end else if (delay_datac[32] && !delay_datad[32]) begin + r_result <= res00 + delay_datac[31:0]; + end else if (!delay_datac[32] && delay_datad[32]) begin + r_result <= res00 + delay_datad[31:0]; + end else begin + r_result <= res00; + end + end + r_ovalid <= delay_datac[33] && delay_datad[33]; + end +end + +assign result = r_result; +assign ovalid = r_ovalid; + +endmodule + +module memory_block ( + clk, + waddr, + wen, + wdata, + raddr, + rdata +); + +input wire clk; +input wire [ 8:0] waddr; +input wire wen; +input wire [511:0] wdata; +input wire [ 8:0] raddr; +output wire [511:0] rdata; + +bram_inst b0 (.clk(clk), .wen(wen), .wdata(wdata[(32*1)-1:32*0]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*1)-1:32*0])); +bram_inst b1 (.clk(clk), .wen(wen), .wdata(wdata[(32*2)-1:32*1]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*2)-1:32*1])); +bram_inst b2 (.clk(clk), .wen(wen), .wdata(wdata[(32*3)-1:32*2]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*3)-1:32*2])); +bram_inst b3 (.clk(clk), .wen(wen), .wdata(wdata[(32*4)-1:32*3]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*4)-1:32*3])); +bram_inst b4 (.clk(clk), .wen(wen), .wdata(wdata[(32*5)-1:32*4]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*5)-1:32*4])); +bram_inst b5 (.clk(clk), .wen(wen), .wdata(wdata[(32*6)-1:32*5]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*6)-1:32*5])); +bram_inst b6 (.clk(clk), .wen(wen), .wdata(wdata[(32*7)-1:32*6]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*7)-1:32*6])); +bram_inst b7 (.clk(clk), .wen(wen), .wdata(wdata[(32*8)-1:32*7]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*8)-1:32*7])); +bram_inst b8 (.clk(clk), .wen(wen), .wdata(wdata[(32*9)-1:32*8]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*9)-1:32*8])); +bram_inst b9 (.clk(clk), .wen(wen), .wdata(wdata[(32*10)-1:32*9]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*10)-1:32*9])); +bram_inst b10 (.clk(clk), .wen(wen), .wdata(wdata[(32*11)-1:32*10]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*11)-1:32*10])); +bram_inst b11 (.clk(clk), .wen(wen), .wdata(wdata[(32*12)-1:32*11]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*12)-1:32*11])); +bram_inst b12 (.clk(clk), .wen(wen), .wdata(wdata[(32*13)-1:32*12]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*13)-1:32*12])); +bram_inst b13 (.clk(clk), .wen(wen), .wdata(wdata[(32*14)-1:32*13]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*14)-1:32*13])); +bram_inst b14 (.clk(clk), .wen(wen), .wdata(wdata[(32*15)-1:32*14]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*15)-1:32*14])); +bram_inst b15 (.clk(clk), .wen(wen), .wdata(wdata[(32*16)-1:32*15]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*16)-1:32*15])); + +endmodule + +module fifo_mvm ( + clk, + rst, + push, + idata, + pop, + odata, + empty, + full +); + +input wire clk; +input wire rst; +input wire push; +input wire [511:0] idata; +input wire pop; +output wire [511:0] odata; +output reg empty; +output reg full; + +reg [8:0] head_ptr; +reg [8:0] tail_ptr; + +bram_inst b0 (.clk(clk), .wen(push), .wdata(idata[(32*1)-1:32*0]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*1)-1:32*0])); +bram_inst b1 (.clk(clk), .wen(push), .wdata(idata[(32*2)-1:32*1]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*2)-1:32*1])); +bram_inst b2 (.clk(clk), .wen(push), .wdata(idata[(32*3)-1:32*2]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*3)-1:32*2])); +bram_inst b3 (.clk(clk), .wen(push), .wdata(idata[(32*4)-1:32*3]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*4)-1:32*3])); +bram_inst b4 (.clk(clk), .wen(push), .wdata(idata[(32*5)-1:32*4]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*5)-1:32*4])); +bram_inst b5 (.clk(clk), .wen(push), .wdata(idata[(32*6)-1:32*5]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*6)-1:32*5])); +bram_inst b6 (.clk(clk), .wen(push), .wdata(idata[(32*7)-1:32*6]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*7)-1:32*6])); +bram_inst b7 (.clk(clk), .wen(push), .wdata(idata[(32*8)-1:32*7]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*8)-1:32*7])); +bram_inst b8 (.clk(clk), .wen(push), .wdata(idata[(32*9)-1:32*8]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*9)-1:32*8])); +bram_inst b9 (.clk(clk), .wen(push), .wdata(idata[(32*10)-1:32*9]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*10)-1:32*9])); +bram_inst b10 (.clk(clk), .wen(push), .wdata(idata[(32*11)-1:32*10]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*11)-1:32*10])); +bram_inst b11 (.clk(clk), .wen(push), .wdata(idata[(32*12)-1:32*11]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*12)-1:32*11])); +bram_inst b12 (.clk(clk), .wen(push), .wdata(idata[(32*13)-1:32*12]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*13)-1:32*12])); +bram_inst b13 (.clk(clk), .wen(push), .wdata(idata[(32*14)-1:32*13]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*14)-1:32*13])); +bram_inst b14 (.clk(clk), .wen(push), .wdata(idata[(32*15)-1:32*14]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*15)-1:32*14])); +bram_inst b15 (.clk(clk), .wen(push), .wdata(idata[(32*16)-1:32*15]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*16)-1:32*15])); + +always @ (posedge clk) begin + if (rst) begin + head_ptr <= 0; + tail_ptr <= 0; + end else begin + if (push) tail_ptr <= tail_ptr + 1; + if (pop) head_ptr <= head_ptr + 1; + end +end + +always @ (*) begin + if (tail_ptr == head_ptr) begin + empty = 1'b1; + end else begin + empty = 1'b0; + end + + if (tail_ptr+1 == head_ptr) begin + full = 1'b1; + end else begin + full = 1'b0; + end +end + +endmodule + + +module bram_inst( + clk, + wen, + wdata, + waddr, + raddr, + rdata +); + +input wire clk; +input wire wen; +input wire [31:0] wdata; +input wire [ 8:0] waddr; +input wire [ 8:0] raddr; +output wire [31:0] rdata; + +wire [39:0] rtemp; +wire [39:0] wtemp; +wire [8:0] addrtemp; +assign rdata = rtemp[31:0]; +assign wtemp = {8'd0, wdata}; +assign addrtemp = waddr | raddr; + +single_port_ram bram_instance( + .clk(clk), + .we(wen), + .data(wtemp), + .addr(addrtemp), + .out(rtemp) +); + +endmodule + + +module dpe_pipeline ( + clk, + rst, + data_in, + data_out +); + +input wire clk; +input wire rst; +input wire [33:0] data_in; +output wire [33:0] data_out; + +reg [33:0] r_pipeline_00; +reg [33:0] r_pipeline_01; +reg [33:0] r_pipeline_02; +reg [33:0] r_pipeline_03; +reg [33:0] r_pipeline_04; +reg [33:0] r_pipeline_05; +reg [33:0] r_pipeline_06; +reg [33:0] r_pipeline_07; +reg [33:0] r_pipeline_08; +reg [33:0] r_pipeline_09; +reg [33:0] r_pipeline_10; +reg [33:0] r_pipeline_11; +reg [33:0] r_pipeline_12; +reg [33:0] r_pipeline_13; +reg [33:0] r_pipeline_14; +reg [33:0] r_pipeline_15; +reg [33:0] r_pipeline_16; +reg [33:0] r_pipeline_17; +reg [33:0] r_pipeline_18; +reg [33:0] r_pipeline_19; +reg [33:0] r_pipeline_20; +reg [33:0] r_pipeline_21; +reg [33:0] r_pipeline_22; +reg [33:0] r_pipeline_23; +reg [33:0] r_pipeline_24; +reg [33:0] r_pipeline_25; +reg [33:0] r_pipeline_26; +reg [33:0] r_pipeline_27; +reg [33:0] r_pipeline_28; +reg [33:0] r_pipeline_29; +reg [33:0] r_pipeline_30; +reg [33:0] r_pipeline_31; + +always @ (posedge clk) begin + if (rst) begin + r_pipeline_00 <= 0; + r_pipeline_01 <= 0; + r_pipeline_02 <= 0; + r_pipeline_03 <= 0; + r_pipeline_04 <= 0; + r_pipeline_05 <= 0; + r_pipeline_06 <= 0; + r_pipeline_07 <= 0; + r_pipeline_08 <= 0; + r_pipeline_09 <= 0; + r_pipeline_10 <= 0; + r_pipeline_11 <= 0; + r_pipeline_12 <= 0; + r_pipeline_13 <= 0; + r_pipeline_14 <= 0; + r_pipeline_15 <= 0; + r_pipeline_16 <= 0; + r_pipeline_17 <= 0; + r_pipeline_18 <= 0; + r_pipeline_19 <= 0; + r_pipeline_20 <= 0; + r_pipeline_21 <= 0; + r_pipeline_22 <= 0; + r_pipeline_23 <= 0; + r_pipeline_24 <= 0; + r_pipeline_25 <= 0; + r_pipeline_26 <= 0; + r_pipeline_27 <= 0; + r_pipeline_28 <= 0; + r_pipeline_29 <= 0; + r_pipeline_30 <= 0; + r_pipeline_31 <= 0; + end else begin + r_pipeline_00 <= r_pipeline_01; + r_pipeline_01 <= r_pipeline_02; + r_pipeline_02 <= r_pipeline_03; + r_pipeline_03 <= r_pipeline_04; + r_pipeline_04 <= r_pipeline_05; + r_pipeline_05 <= r_pipeline_06; + r_pipeline_06 <= r_pipeline_07; + r_pipeline_07 <= r_pipeline_08; + r_pipeline_08 <= r_pipeline_09; + r_pipeline_09 <= r_pipeline_10; + r_pipeline_10 <= r_pipeline_11; + r_pipeline_11 <= r_pipeline_12; + r_pipeline_12 <= r_pipeline_13; + r_pipeline_13 <= r_pipeline_14; + r_pipeline_14 <= r_pipeline_15; + r_pipeline_15 <= r_pipeline_16; + r_pipeline_16 <= r_pipeline_17; + r_pipeline_17 <= r_pipeline_18; + r_pipeline_18 <= r_pipeline_19; + r_pipeline_19 <= r_pipeline_20; + r_pipeline_20 <= r_pipeline_21; + r_pipeline_21 <= r_pipeline_22; + r_pipeline_22 <= r_pipeline_23; + r_pipeline_23 <= r_pipeline_24; + r_pipeline_24 <= r_pipeline_25; + r_pipeline_25 <= r_pipeline_26; + r_pipeline_26 <= r_pipeline_27; + r_pipeline_27 <= r_pipeline_28; + r_pipeline_28 <= r_pipeline_29; + r_pipeline_29 <= r_pipeline_30; + r_pipeline_30 <= r_pipeline_31; + r_pipeline_31 <= data_in; + end +end + +assign data_out = r_pipeline_00; + +endmodule + + +module dsp_inst( + clk, + reset, + ax, + ay, + bx, + by, + chainin, + result, + chainout +); + +input wire clk; +input wire reset; +input wire [15:0] ax; +input wire [15:0] ay; +input wire [15:0] bx; +input wire [15:0] by; +input wire [36:0] chainin; +output wire [31:0] result; +output wire [36:0] chainout; + +wire [18:0] tmp_ax; +wire [19:0] tmp_ay; +wire [18:0] tmp_bx; +wire [19:0] tmp_by; +wire [36:0] tmp_result; + +assign tmp_ax = {2'b0, ax}; +assign tmp_ay = {3'b0, ay}; +assign tmp_bx = {2'b0, bx}; +assign tmp_by = {3'b0, by}; +assign result = tmp_result[31:0]; + +int_sop_2 dsp_instance( + .clk(clk), + .reset(reset), + .ax(tmp_ax), + .ay(tmp_ay), + .bx(tmp_bx), + .by(tmp_by), + .chainin(chainin), + .result(tmp_result), + .chainout(chainout) +); + +endmodule + +module noc_router_adapter_block_inst( + clk, + reset, + master_tready, + master_tdata, /* synthesis preserve */ + master_tvalid, /* synthesis preserve */ + master_tstrb, /* synthesis preserve */ + master_tkeep, /* synthesis preserve */ + master_tid, /* synthesis preserve */ + master_tdest, /* synthesis preserve */ + master_tuser, /* synthesis preserve */ + master_tlast, /* synthesis preserve */ + slave_tvalid, + slave_tready, /* synthesis preserve */ + slave_tdata, + slave_tstrb, + slave_tkeep, + slave_tid, + slave_tdest, + slave_tuser, + slave_tlast, +); + +parameter noc_dw = 32; //NoC Data Width +parameter byte_dw = 8; + +/*control signal*/ +input wire clk; +input wire reset; + +/*Master*/ +input wire master_tready; +output reg master_tvalid; +output reg [noc_dw - 1 : 0] master_tdata; +output reg [noc_dw / 8 - 1 : 0] master_tstrb; +output reg [noc_dw / 8 - 1 : 0] master_tkeep; +output reg [byte_dw - 1 : 0] master_tid; +output reg [byte_dw - 1 : 0] master_tdest; +output reg [byte_dw - 1 : 0] master_tuser; +output reg master_tlast; + +/*Slave*/ +input wire slave_tvalid; +input wire [noc_dw - 1 : 0] slave_tdata; +input wire [noc_dw / 8 - 1 : 0] slave_tstrb; +input wire [noc_dw / 8 - 1 : 0] slave_tkeep; +input wire [byte_dw - 1 : 0] slave_tid; +input wire [byte_dw - 1 : 0] slave_tdest; +input wire [byte_dw - 1 : 0] slave_tuser; +input wire slave_tlast; +output reg slave_tready; + +noc_router_adapter_block noc_instance( + .clk(clk), + .reset(reset), + .master_tready(master_tready), + .master_tdata(master_tdata), + .master_tvalid(master_tvalid), + .master_tstrb(master_tstrb), + .master_tkeep(master_tkeep), + .master_tid(master_tid), + .master_tdest(master_tdest), + .master_tuser(master_tuser), + .master_tlast(master_tlast), + .slave_tvalid(slave_tvalid), + .slave_tready(slave_tready), + .slave_tdata(slave_tdata), + .slave_tstrb(slave_tstrb), + .slave_tkeep(slave_tkeep), + .slave_tid(slave_tid), + .slave_tdest(slave_tdest), + .slave_tuser(slave_tuser), + .slave_tlast(slave_tlast) +); + + +endmodule \ No newline at end of file diff --git a/vtr_flow/benchmarks/noc/Large_Designs/MLP/MLP_1/verilog/mlp_1.v b/vtr_flow/benchmarks/noc/Large_Designs/MLP/MLP_1/verilog/mlp_1.v index 6471e86ce95..06915daaf8c 100644 --- a/vtr_flow/benchmarks/noc/Large_Designs/MLP/MLP_1/verilog/mlp_1.v +++ b/vtr_flow/benchmarks/noc/Large_Designs/MLP/MLP_1/verilog/mlp_1.v @@ -11,6 +11,8 @@ modules to the collector module. */ +/* This file contain only module instanstation, for each module definition see "shared_verilog" folder */ + module mlp_1 ( clk, reset, diff --git a/vtr_flow/benchmarks/noc/Large_Designs/MLP/shared_verilog/hard_block_include.v b/vtr_flow/benchmarks/noc/Large_Designs/MLP/shared_verilog/hard_block_include.v new file mode 100644 index 00000000000..6d8d25b258f --- /dev/null +++ b/vtr_flow/benchmarks/noc/Large_Designs/MLP/shared_verilog/hard_block_include.v @@ -0,0 +1,3 @@ +`define dsp_top +`define noc_router_adapter_block +`define memory diff --git a/vtr_flow/benchmarks/noc/Large_Designs/MLP/shared_verilog/simple_collector.v b/vtr_flow/benchmarks/noc/Large_Designs/MLP/shared_verilog/simple_collector.v new file mode 100644 index 00000000000..bf821199b60 --- /dev/null +++ b/vtr_flow/benchmarks/noc/Large_Designs/MLP/shared_verilog/simple_collector.v @@ -0,0 +1,221 @@ +/* Simplified dispatcher used for FPT'23 */ + +module collector ( + clk, + rst, + rx_tvalid, + rx_tdata, + rx_tstrb, + rx_tkeep, + rx_tid, + rx_tdest, + rx_tuser, + rx_tlast, + rx_tready, + ofifo_rdata, + ofifo_ren, + ofifo_rdy +); + +input clk; +input rst; +// Rx interface +input rx_tvalid; +input [511:0] rx_tdata; +input [63:0] rx_tstrb; +input [63:0] rx_tkeep; +input [7:0] rx_tid; +input [7:0] rx_tdest; +input [31:0] rx_tuser; +input rx_tlast; +output rx_tready; +// External FIFO IO +output [63:0] ofifo_rdata; +input ofifo_ren; +output ofifo_rdy; + +wire fifo_push; +assign fifo_push = rx_tvalid && rx_tready; +wire fifo_full_signal, fifo_empty_signal; +wire [511:0] fifo_rdata; +assign ofifo_rdata[0] = fifo_rdata[0] ^ fifo_rdata[1] ^ fifo_rdata[2] ^ fifo_rdata[3] ^ fifo_rdata[4] ^ fifo_rdata[5] ^ fifo_rdata[6] ^ fifo_rdata[7]; +assign ofifo_rdata[1] = fifo_rdata[8] ^ fifo_rdata[9] ^ fifo_rdata[10] ^ fifo_rdata[11] ^ fifo_rdata[12] ^ fifo_rdata[13] ^ fifo_rdata[14] ^ fifo_rdata[15]; +assign ofifo_rdata[2] = fifo_rdata[16] ^ fifo_rdata[17] ^ fifo_rdata[18] ^ fifo_rdata[19] ^ fifo_rdata[20] ^ fifo_rdata[21] ^ fifo_rdata[22] ^ fifo_rdata[23]; +assign ofifo_rdata[3] = fifo_rdata[24] ^ fifo_rdata[25] ^ fifo_rdata[26] ^ fifo_rdata[27] ^ fifo_rdata[28] ^ fifo_rdata[29] ^ fifo_rdata[30] ^ fifo_rdata[31]; +assign ofifo_rdata[4] = fifo_rdata[32] ^ fifo_rdata[33] ^ fifo_rdata[34] ^ fifo_rdata[35] ^ fifo_rdata[36] ^ fifo_rdata[37] ^ fifo_rdata[38] ^ fifo_rdata[39]; +assign ofifo_rdata[5] = fifo_rdata[40] ^ fifo_rdata[41] ^ fifo_rdata[42] ^ fifo_rdata[43] ^ fifo_rdata[44] ^ fifo_rdata[45] ^ fifo_rdata[46] ^ fifo_rdata[47]; +assign ofifo_rdata[6] = fifo_rdata[48] ^ fifo_rdata[49] ^ fifo_rdata[50] ^ fifo_rdata[51] ^ fifo_rdata[52] ^ fifo_rdata[53] ^ fifo_rdata[54] ^ fifo_rdata[55]; +assign ofifo_rdata[7] = fifo_rdata[56] ^ fifo_rdata[57] ^ fifo_rdata[58] ^ fifo_rdata[59] ^ fifo_rdata[60] ^ fifo_rdata[61] ^ fifo_rdata[62] ^ fifo_rdata[63]; +assign ofifo_rdata[8] = fifo_rdata[64] ^ fifo_rdata[65] ^ fifo_rdata[66] ^ fifo_rdata[67] ^ fifo_rdata[68] ^ fifo_rdata[69] ^ fifo_rdata[70] ^ fifo_rdata[71]; +assign ofifo_rdata[9] = fifo_rdata[72] ^ fifo_rdata[73] ^ fifo_rdata[74] ^ fifo_rdata[75] ^ fifo_rdata[76] ^ fifo_rdata[77] ^ fifo_rdata[78] ^ fifo_rdata[79]; +assign ofifo_rdata[10] = fifo_rdata[80] ^ fifo_rdata[81] ^ fifo_rdata[82] ^ fifo_rdata[83] ^ fifo_rdata[84] ^ fifo_rdata[85] ^ fifo_rdata[86] ^ fifo_rdata[87]; +assign ofifo_rdata[11] = fifo_rdata[88] ^ fifo_rdata[89] ^ fifo_rdata[90] ^ fifo_rdata[91] ^ fifo_rdata[92] ^ fifo_rdata[93] ^ fifo_rdata[94] ^ fifo_rdata[95]; +assign ofifo_rdata[12] = fifo_rdata[96] ^ fifo_rdata[97] ^ fifo_rdata[98] ^ fifo_rdata[99] ^ fifo_rdata[100] ^ fifo_rdata[101] ^ fifo_rdata[102] ^ fifo_rdata[103]; +assign ofifo_rdata[13] = fifo_rdata[104] ^ fifo_rdata[105] ^ fifo_rdata[106] ^ fifo_rdata[107] ^ fifo_rdata[108] ^ fifo_rdata[109] ^ fifo_rdata[110] ^ fifo_rdata[111]; +assign ofifo_rdata[14] = fifo_rdata[112] ^ fifo_rdata[113] ^ fifo_rdata[114] ^ fifo_rdata[115] ^ fifo_rdata[116] ^ fifo_rdata[117] ^ fifo_rdata[118] ^ fifo_rdata[119]; +assign ofifo_rdata[15] = fifo_rdata[120] ^ fifo_rdata[121] ^ fifo_rdata[122] ^ fifo_rdata[123] ^ fifo_rdata[124] ^ fifo_rdata[125] ^ fifo_rdata[126] ^ fifo_rdata[127]; +assign ofifo_rdata[16] = fifo_rdata[128] ^ fifo_rdata[129] ^ fifo_rdata[130] ^ fifo_rdata[131] ^ fifo_rdata[132] ^ fifo_rdata[133] ^ fifo_rdata[134] ^ fifo_rdata[135]; +assign ofifo_rdata[17] = fifo_rdata[136] ^ fifo_rdata[137] ^ fifo_rdata[138] ^ fifo_rdata[139] ^ fifo_rdata[140] ^ fifo_rdata[141] ^ fifo_rdata[142] ^ fifo_rdata[143]; +assign ofifo_rdata[18] = fifo_rdata[144] ^ fifo_rdata[145] ^ fifo_rdata[146] ^ fifo_rdata[147] ^ fifo_rdata[148] ^ fifo_rdata[149] ^ fifo_rdata[150] ^ fifo_rdata[151]; +assign ofifo_rdata[19] = fifo_rdata[152] ^ fifo_rdata[153] ^ fifo_rdata[154] ^ fifo_rdata[155] ^ fifo_rdata[156] ^ fifo_rdata[157] ^ fifo_rdata[158] ^ fifo_rdata[159]; +assign ofifo_rdata[20] = fifo_rdata[160] ^ fifo_rdata[161] ^ fifo_rdata[162] ^ fifo_rdata[163] ^ fifo_rdata[164] ^ fifo_rdata[165] ^ fifo_rdata[166] ^ fifo_rdata[167]; +assign ofifo_rdata[21] = fifo_rdata[168] ^ fifo_rdata[169] ^ fifo_rdata[170] ^ fifo_rdata[171] ^ fifo_rdata[172] ^ fifo_rdata[173] ^ fifo_rdata[174] ^ fifo_rdata[175]; +assign ofifo_rdata[22] = fifo_rdata[176] ^ fifo_rdata[177] ^ fifo_rdata[178] ^ fifo_rdata[179] ^ fifo_rdata[180] ^ fifo_rdata[181] ^ fifo_rdata[182] ^ fifo_rdata[183]; +assign ofifo_rdata[23] = fifo_rdata[184] ^ fifo_rdata[185] ^ fifo_rdata[186] ^ fifo_rdata[187] ^ fifo_rdata[188] ^ fifo_rdata[189] ^ fifo_rdata[190] ^ fifo_rdata[191]; +assign ofifo_rdata[24] = fifo_rdata[192] ^ fifo_rdata[193] ^ fifo_rdata[194] ^ fifo_rdata[195] ^ fifo_rdata[196] ^ fifo_rdata[197] ^ fifo_rdata[198] ^ fifo_rdata[199]; +assign ofifo_rdata[25] = fifo_rdata[200] ^ fifo_rdata[201] ^ fifo_rdata[202] ^ fifo_rdata[203] ^ fifo_rdata[204] ^ fifo_rdata[205] ^ fifo_rdata[206] ^ fifo_rdata[207]; +assign ofifo_rdata[26] = fifo_rdata[208] ^ fifo_rdata[209] ^ fifo_rdata[210] ^ fifo_rdata[211] ^ fifo_rdata[212] ^ fifo_rdata[213] ^ fifo_rdata[214] ^ fifo_rdata[215]; +assign ofifo_rdata[27] = fifo_rdata[216] ^ fifo_rdata[217] ^ fifo_rdata[218] ^ fifo_rdata[219] ^ fifo_rdata[220] ^ fifo_rdata[221] ^ fifo_rdata[222] ^ fifo_rdata[223]; +assign ofifo_rdata[28] = fifo_rdata[224] ^ fifo_rdata[225] ^ fifo_rdata[226] ^ fifo_rdata[227] ^ fifo_rdata[228] ^ fifo_rdata[229] ^ fifo_rdata[230] ^ fifo_rdata[231]; +assign ofifo_rdata[29] = fifo_rdata[232] ^ fifo_rdata[233] ^ fifo_rdata[234] ^ fifo_rdata[235] ^ fifo_rdata[236] ^ fifo_rdata[237] ^ fifo_rdata[238] ^ fifo_rdata[239]; +assign ofifo_rdata[30] = fifo_rdata[240] ^ fifo_rdata[241] ^ fifo_rdata[242] ^ fifo_rdata[243] ^ fifo_rdata[244] ^ fifo_rdata[245] ^ fifo_rdata[246] ^ fifo_rdata[247]; +assign ofifo_rdata[31] = fifo_rdata[248] ^ fifo_rdata[249] ^ fifo_rdata[250] ^ fifo_rdata[251] ^ fifo_rdata[252] ^ fifo_rdata[253] ^ fifo_rdata[254] ^ fifo_rdata[255]; +assign ofifo_rdata[32] = fifo_rdata[256] ^ fifo_rdata[257] ^ fifo_rdata[258] ^ fifo_rdata[259] ^ fifo_rdata[260] ^ fifo_rdata[261] ^ fifo_rdata[262] ^ fifo_rdata[263]; +assign ofifo_rdata[33] = fifo_rdata[264] ^ fifo_rdata[265] ^ fifo_rdata[266] ^ fifo_rdata[267] ^ fifo_rdata[268] ^ fifo_rdata[269] ^ fifo_rdata[270] ^ fifo_rdata[271]; +assign ofifo_rdata[34] = fifo_rdata[272] ^ fifo_rdata[273] ^ fifo_rdata[274] ^ fifo_rdata[275] ^ fifo_rdata[276] ^ fifo_rdata[277] ^ fifo_rdata[278] ^ fifo_rdata[279]; +assign ofifo_rdata[35] = fifo_rdata[280] ^ fifo_rdata[281] ^ fifo_rdata[282] ^ fifo_rdata[283] ^ fifo_rdata[284] ^ fifo_rdata[285] ^ fifo_rdata[286] ^ fifo_rdata[287]; +assign ofifo_rdata[36] = fifo_rdata[288] ^ fifo_rdata[289] ^ fifo_rdata[290] ^ fifo_rdata[291] ^ fifo_rdata[292] ^ fifo_rdata[293] ^ fifo_rdata[294] ^ fifo_rdata[295]; +assign ofifo_rdata[37] = fifo_rdata[296] ^ fifo_rdata[297] ^ fifo_rdata[298] ^ fifo_rdata[299] ^ fifo_rdata[300] ^ fifo_rdata[301] ^ fifo_rdata[302] ^ fifo_rdata[303]; +assign ofifo_rdata[38] = fifo_rdata[304] ^ fifo_rdata[305] ^ fifo_rdata[306] ^ fifo_rdata[307] ^ fifo_rdata[308] ^ fifo_rdata[309] ^ fifo_rdata[310] ^ fifo_rdata[311]; +assign ofifo_rdata[39] = fifo_rdata[312] ^ fifo_rdata[313] ^ fifo_rdata[314] ^ fifo_rdata[315] ^ fifo_rdata[316] ^ fifo_rdata[317] ^ fifo_rdata[318] ^ fifo_rdata[319]; +assign ofifo_rdata[40] = fifo_rdata[320] ^ fifo_rdata[321] ^ fifo_rdata[322] ^ fifo_rdata[323] ^ fifo_rdata[324] ^ fifo_rdata[325] ^ fifo_rdata[326] ^ fifo_rdata[327]; +assign ofifo_rdata[41] = fifo_rdata[328] ^ fifo_rdata[329] ^ fifo_rdata[330] ^ fifo_rdata[331] ^ fifo_rdata[332] ^ fifo_rdata[333] ^ fifo_rdata[334] ^ fifo_rdata[335]; +assign ofifo_rdata[42] = fifo_rdata[336] ^ fifo_rdata[337] ^ fifo_rdata[338] ^ fifo_rdata[339] ^ fifo_rdata[340] ^ fifo_rdata[341] ^ fifo_rdata[342] ^ fifo_rdata[343]; +assign ofifo_rdata[43] = fifo_rdata[344] ^ fifo_rdata[345] ^ fifo_rdata[346] ^ fifo_rdata[347] ^ fifo_rdata[348] ^ fifo_rdata[349] ^ fifo_rdata[350] ^ fifo_rdata[351]; +assign ofifo_rdata[44] = fifo_rdata[352] ^ fifo_rdata[353] ^ fifo_rdata[354] ^ fifo_rdata[355] ^ fifo_rdata[356] ^ fifo_rdata[357] ^ fifo_rdata[358] ^ fifo_rdata[359]; +assign ofifo_rdata[45] = fifo_rdata[360] ^ fifo_rdata[361] ^ fifo_rdata[362] ^ fifo_rdata[363] ^ fifo_rdata[364] ^ fifo_rdata[365] ^ fifo_rdata[366] ^ fifo_rdata[367]; +assign ofifo_rdata[46] = fifo_rdata[368] ^ fifo_rdata[369] ^ fifo_rdata[370] ^ fifo_rdata[371] ^ fifo_rdata[372] ^ fifo_rdata[373] ^ fifo_rdata[374] ^ fifo_rdata[375]; +assign ofifo_rdata[47] = fifo_rdata[376] ^ fifo_rdata[377] ^ fifo_rdata[378] ^ fifo_rdata[379] ^ fifo_rdata[380] ^ fifo_rdata[381] ^ fifo_rdata[382] ^ fifo_rdata[383]; +assign ofifo_rdata[48] = fifo_rdata[384] ^ fifo_rdata[385] ^ fifo_rdata[386] ^ fifo_rdata[387] ^ fifo_rdata[388] ^ fifo_rdata[389] ^ fifo_rdata[390] ^ fifo_rdata[391]; +assign ofifo_rdata[49] = fifo_rdata[392] ^ fifo_rdata[393] ^ fifo_rdata[394] ^ fifo_rdata[395] ^ fifo_rdata[396] ^ fifo_rdata[397] ^ fifo_rdata[398] ^ fifo_rdata[399]; +assign ofifo_rdata[50] = fifo_rdata[400] ^ fifo_rdata[401] ^ fifo_rdata[402] ^ fifo_rdata[403] ^ fifo_rdata[404] ^ fifo_rdata[405] ^ fifo_rdata[406] ^ fifo_rdata[407]; +assign ofifo_rdata[51] = fifo_rdata[408] ^ fifo_rdata[409] ^ fifo_rdata[410] ^ fifo_rdata[411] ^ fifo_rdata[412] ^ fifo_rdata[413] ^ fifo_rdata[414] ^ fifo_rdata[415]; +assign ofifo_rdata[52] = fifo_rdata[416] ^ fifo_rdata[417] ^ fifo_rdata[418] ^ fifo_rdata[419] ^ fifo_rdata[420] ^ fifo_rdata[421] ^ fifo_rdata[422] ^ fifo_rdata[423]; +assign ofifo_rdata[53] = fifo_rdata[424] ^ fifo_rdata[425] ^ fifo_rdata[426] ^ fifo_rdata[427] ^ fifo_rdata[428] ^ fifo_rdata[429] ^ fifo_rdata[430] ^ fifo_rdata[431]; +assign ofifo_rdata[54] = fifo_rdata[432] ^ fifo_rdata[433] ^ fifo_rdata[434] ^ fifo_rdata[435] ^ fifo_rdata[436] ^ fifo_rdata[437] ^ fifo_rdata[438] ^ fifo_rdata[439]; +assign ofifo_rdata[55] = fifo_rdata[440] ^ fifo_rdata[441] ^ fifo_rdata[442] ^ fifo_rdata[443] ^ fifo_rdata[444] ^ fifo_rdata[445] ^ fifo_rdata[446] ^ fifo_rdata[447]; +assign ofifo_rdata[56] = fifo_rdata[448] ^ fifo_rdata[449] ^ fifo_rdata[450] ^ fifo_rdata[451] ^ fifo_rdata[452] ^ fifo_rdata[453] ^ fifo_rdata[454] ^ fifo_rdata[455]; +assign ofifo_rdata[57] = fifo_rdata[456] ^ fifo_rdata[457] ^ fifo_rdata[458] ^ fifo_rdata[459] ^ fifo_rdata[460] ^ fifo_rdata[461] ^ fifo_rdata[462] ^ fifo_rdata[463]; +assign ofifo_rdata[58] = fifo_rdata[464] ^ fifo_rdata[465] ^ fifo_rdata[466] ^ fifo_rdata[467] ^ fifo_rdata[468] ^ fifo_rdata[469] ^ fifo_rdata[470] ^ fifo_rdata[471]; +assign ofifo_rdata[59] = fifo_rdata[472] ^ fifo_rdata[473] ^ fifo_rdata[474] ^ fifo_rdata[475] ^ fifo_rdata[476] ^ fifo_rdata[477] ^ fifo_rdata[478] ^ fifo_rdata[479]; +assign ofifo_rdata[60] = fifo_rdata[480] ^ fifo_rdata[481] ^ fifo_rdata[482] ^ fifo_rdata[483] ^ fifo_rdata[484] ^ fifo_rdata[485] ^ fifo_rdata[486] ^ fifo_rdata[487]; +assign ofifo_rdata[61] = fifo_rdata[488] ^ fifo_rdata[489] ^ fifo_rdata[490] ^ fifo_rdata[491] ^ fifo_rdata[492] ^ fifo_rdata[493] ^ fifo_rdata[494] ^ fifo_rdata[495]; +assign ofifo_rdata[62] = fifo_rdata[496] ^ fifo_rdata[497] ^ fifo_rdata[498] ^ fifo_rdata[499] ^ fifo_rdata[500] ^ fifo_rdata[501] ^ fifo_rdata[502] ^ fifo_rdata[503]; +assign ofifo_rdata[63] = fifo_rdata[504] ^ fifo_rdata[505] ^ fifo_rdata[506] ^ fifo_rdata[507] ^ fifo_rdata[508] ^ fifo_rdata[509] ^ fifo_rdata[510] ^ fifo_rdata[511]; + + +fifo_collector fifo_inst ( + .clk(clk), + .rst(rst), + .push(fifo_push), + .idata(rx_tdata), + .pop(ofifo_ren), + .odata(fifo_rdata), + .empty(fifo_empty_signal), + .full(fifo_full_signal) +); + +assign rx_tready = !fifo_empty_signal; +assign ofifo_rdy = !fifo_full_signal; + +endmodule + +module fifo_collector ( + clk, + rst, + push, + idata, + pop, + odata, + empty, + full +); + +input wire clk; +input wire rst; +input wire push; +input wire [511:0] idata; +input wire pop; +output wire [511:0] odata; +output reg empty; +output reg full; + +reg [8:0] head_ptr; +reg [8:0] tail_ptr; + +bram_inst b0 (.clk(clk), .wen(push), .wdata(idata[(32*1)-1:32*0]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*1)-1:32*0])); +bram_inst b1 (.clk(clk), .wen(push), .wdata(idata[(32*2)-1:32*1]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*2)-1:32*1])); +bram_inst b2 (.clk(clk), .wen(push), .wdata(idata[(32*3)-1:32*2]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*3)-1:32*2])); +bram_inst b3 (.clk(clk), .wen(push), .wdata(idata[(32*4)-1:32*3]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*4)-1:32*3])); +bram_inst b4 (.clk(clk), .wen(push), .wdata(idata[(32*5)-1:32*4]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*5)-1:32*4])); +bram_inst b5 (.clk(clk), .wen(push), .wdata(idata[(32*6)-1:32*5]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*6)-1:32*5])); +bram_inst b6 (.clk(clk), .wen(push), .wdata(idata[(32*7)-1:32*6]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*7)-1:32*6])); +bram_inst b7 (.clk(clk), .wen(push), .wdata(idata[(32*8)-1:32*7]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*8)-1:32*7])); +bram_inst b8 (.clk(clk), .wen(push), .wdata(idata[(32*9)-1:32*8]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*9)-1:32*8])); +bram_inst b9 (.clk(clk), .wen(push), .wdata(idata[(32*10)-1:32*9]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*10)-1:32*9])); +bram_inst b10 (.clk(clk), .wen(push), .wdata(idata[(32*11)-1:32*10]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*11)-1:32*10])); +bram_inst b11 (.clk(clk), .wen(push), .wdata(idata[(32*12)-1:32*11]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*12)-1:32*11])); +bram_inst b12 (.clk(clk), .wen(push), .wdata(idata[(32*13)-1:32*12]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*13)-1:32*12])); +bram_inst b13 (.clk(clk), .wen(push), .wdata(idata[(32*14)-1:32*13]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*14)-1:32*13])); +bram_inst b14 (.clk(clk), .wen(push), .wdata(idata[(32*15)-1:32*14]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*15)-1:32*14])); +bram_inst b15 (.clk(clk), .wen(push), .wdata(idata[(32*16)-1:32*15]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*16)-1:32*15])); + +always @ (posedge clk) begin + if (rst) begin + head_ptr <= 0; + tail_ptr <= 0; + end else begin + if (push) tail_ptr <= tail_ptr + 1; + if (pop) head_ptr <= head_ptr + 1; + end +end + +always @ (*) begin + if (tail_ptr == head_ptr) begin + empty = 1'b1; + end else begin + empty = 1'b0; + end + + if (tail_ptr+1 == head_ptr) begin + full = 1'b1; + end else begin + full = 1'b0; + end +end + +endmodule + + +module bram_inst( + clk, + wen, + wdata, + waddr, + raddr, + rdata +); + +input wire clk; +input wire wen; +input wire [31:0] wdata; +input wire [ 8:0] waddr; +input wire [ 8:0] raddr; +output wire [31:0] rdata; + +wire [39:0] rtemp; +wire [39:0] wtemp; +wire [8:0] addrtemp; +assign rdata = rtemp[31:0]; +assign wtemp = {8'd0, wdata}; +assign addrtemp = waddr | raddr; + +single_port_ram bram_instance( + .clk(clk), + .we(wen), + .data(wtemp), + .addr(addrtemp), + .out(rtemp) +); + +endmodule \ No newline at end of file diff --git a/vtr_flow/benchmarks/noc/Large_Designs/MLP/shared_verilog/simple_dispatcher.v b/vtr_flow/benchmarks/noc/Large_Designs/MLP/shared_verilog/simple_dispatcher.v new file mode 100644 index 00000000000..6939cf80c50 --- /dev/null +++ b/vtr_flow/benchmarks/noc/Large_Designs/MLP/shared_verilog/simple_dispatcher.v @@ -0,0 +1,196 @@ +/* Simplified dispatcher used for FPT'23 */ + +module dispatcher ( + clk, + rst, + tx_tvalid, + tx_tdata, + tx_tstrb, + tx_tkeep, + tx_tid, + tx_tdest, + tx_tuser, + tx_tlast, + tx_tready, + ififo_wdata, + ififo_wen, + ififo_rdy +); + +input clk; +input rst; +// Tx interface +output tx_tvalid; +output [511:0] tx_tdata; +output [63:0] tx_tstrb; +output [63:0] tx_tkeep; +output [7:0] tx_tid; +output [7:0] tx_tdest; +output [31:0] tx_tuser; +output tx_tlast; +input tx_tready; +// External FIFO IO +input [63:0] ififo_wdata; +input ififo_wen; +output ififo_rdy; + +wire fifo_full_signal, fifo_almost_full_signal, fifo_empty_signal; +wire [511:0] fifo_rdata; +wire [511:0] fifo_wdata; +assign fifo_wdata[ 63: 0] = ififo_wdata; +assign fifo_wdata[127: 64] = ififo_wdata; +assign fifo_wdata[191:128] = ififo_wdata; +assign fifo_wdata[255:192] = ififo_wdata; +assign fifo_wdata[319:256] = ififo_wdata; +assign fifo_wdata[383:320] = ififo_wdata; +assign fifo_wdata[447:384] = ififo_wdata; +assign fifo_wdata[511:448] = ififo_wdata; + +fifo_dispatcher fifo_inst ( + .clk(clk), + .rst(rst), + .push(ififo_wen), + .idata(fifo_wdata), + .pop(tx_tvalid && tx_tready), + .odata(fifo_rdata), + .empty(fifo_empty_signal), + .full(fifo_full_signal) +); + +reg [63:0] r_tx_tstrb; +reg [63:0] r_tx_tkeep; +reg [7:0] r_tx_tid; +reg [7:0] r_tx_tdest; +reg [31:0] r_tx_tuser; +reg r_tx_tlast; + +always @ (posedge clk) begin + if (rst) begin + r_tx_tstrb <= 0; + r_tx_tkeep <= 0; + r_tx_tid <= 0; + r_tx_tdest <= 0; + r_tx_tuser <= 0; + r_tx_tlast <= 0; + end else begin + r_tx_tstrb <= ififo_wdata[63:0]; + r_tx_tkeep <= ififo_wdata[63:0]; + r_tx_tid <= ififo_wdata[7:0] ^ ififo_wdata[15:8]; + r_tx_tdest <= ififo_wdata[7:0] ^ ififo_wdata[23:16]; + r_tx_tuser <= ififo_wdata[31:0] ^ ififo_wdata[63:32]; + r_tx_tlast <= ififo_wdata[63] ^ ififo_wdata[0]; + end +end + +assign tx_tstrb = r_tx_tstrb; +assign tx_tkeep = r_tx_tkeep; +assign tx_tid = r_tx_tid; +assign tx_tdest = r_tx_tdest; +assign tx_tuser = r_tx_tuser; +assign tx_tlast = r_tx_tlast; + +assign tx_tvalid = !fifo_empty_signal; +assign tx_tdata = fifo_rdata; +assign ififo_rdy = !fifo_full_signal; + +endmodule + +module fifo_dispatcher ( + clk, + rst, + push, + idata, + pop, + odata, + empty, + full +); + +input wire clk; +input wire rst; +input wire push; +input wire [511:0] idata; +input wire pop; +output wire [511:0] odata; +output reg empty; +output reg full; + +reg [8:0] head_ptr; +reg [8:0] tail_ptr; + +bram_inst b0 (.clk(clk), .wen(push), .wdata(idata[(32*1)-1:32*0]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*1)-1:32*0])); +bram_inst b1 (.clk(clk), .wen(push), .wdata(idata[(32*2)-1:32*1]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*2)-1:32*1])); +bram_inst b2 (.clk(clk), .wen(push), .wdata(idata[(32*3)-1:32*2]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*3)-1:32*2])); +bram_inst b3 (.clk(clk), .wen(push), .wdata(idata[(32*4)-1:32*3]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*4)-1:32*3])); +bram_inst b4 (.clk(clk), .wen(push), .wdata(idata[(32*5)-1:32*4]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*5)-1:32*4])); +bram_inst b5 (.clk(clk), .wen(push), .wdata(idata[(32*6)-1:32*5]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*6)-1:32*5])); +bram_inst b6 (.clk(clk), .wen(push), .wdata(idata[(32*7)-1:32*6]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*7)-1:32*6])); +bram_inst b7 (.clk(clk), .wen(push), .wdata(idata[(32*8)-1:32*7]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*8)-1:32*7])); +bram_inst b8 (.clk(clk), .wen(push), .wdata(idata[(32*9)-1:32*8]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*9)-1:32*8])); +bram_inst b9 (.clk(clk), .wen(push), .wdata(idata[(32*10)-1:32*9]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*10)-1:32*9])); +bram_inst b10 (.clk(clk), .wen(push), .wdata(idata[(32*11)-1:32*10]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*11)-1:32*10])); +bram_inst b11 (.clk(clk), .wen(push), .wdata(idata[(32*12)-1:32*11]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*12)-1:32*11])); +bram_inst b12 (.clk(clk), .wen(push), .wdata(idata[(32*13)-1:32*12]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*13)-1:32*12])); +bram_inst b13 (.clk(clk), .wen(push), .wdata(idata[(32*14)-1:32*13]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*14)-1:32*13])); +bram_inst b14 (.clk(clk), .wen(push), .wdata(idata[(32*15)-1:32*14]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*15)-1:32*14])); +bram_inst b15 (.clk(clk), .wen(push), .wdata(idata[(32*16)-1:32*15]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*16)-1:32*15])); + +always @ (posedge clk) begin + if (rst) begin + head_ptr <= 0; + tail_ptr <= 0; + end else begin + if (push) tail_ptr <= tail_ptr + 1; + if (pop) head_ptr <= head_ptr + 1; + end +end + +always @ (*) begin + if (tail_ptr == head_ptr) begin + empty = 1'b1; + end else begin + empty = 1'b0; + end + + if (tail_ptr+1 == head_ptr) begin + full = 1'b1; + end else begin + full = 1'b0; + end +end + +endmodule + + +module bram_inst( + clk, + wen, + wdata, + waddr, + raddr, + rdata +); + +input wire clk; +input wire wen; +input wire [31:0] wdata; +input wire [ 8:0] waddr; +input wire [ 8:0] raddr; +output wire [31:0] rdata; + +wire [39:0] rtemp; +wire [39:0] wtemp; +wire [8:0] addrtemp; +assign rdata = rtemp[31:0]; +assign wtemp = {8'd0, wdata}; +assign addrtemp = waddr | raddr; + +single_port_ram bram_instance( + .clk(clk), + .we(wen), + .data(wtemp), + .addr(addrtemp), + .out(rtemp) +); + +endmodule diff --git a/vtr_flow/benchmarks/noc/Large_Designs/MLP/shared_verilog/simple_mvm.v b/vtr_flow/benchmarks/noc/Large_Designs/MLP/shared_verilog/simple_mvm.v new file mode 100644 index 00000000000..e78241c5c1a --- /dev/null +++ b/vtr_flow/benchmarks/noc/Large_Designs/MLP/shared_verilog/simple_mvm.v @@ -0,0 +1,589 @@ +/* Simplified MVM used for FPT'23 */ + +module mvm_top ( + clk, + rst, + rx_tvalid, + rx_tdata, + rx_tstrb, + rx_tkeep, + rx_tid, + rx_tdest, + rx_tuser, + rx_tlast, + rx_tready, + tx_tvalid, + tx_tdata, + tx_tstrb, + tx_tkeep, + tx_tid, + tx_tdest, + tx_tuser, + tx_tlast, + tx_tready +); + +input wire clk; +input wire rst; +// Rx interface +input wire rx_tvalid; +input wire [511:0] rx_tdata; +input wire [ 63:0] rx_tstrb; +input wire [ 63:0] rx_tkeep; +input wire [ 7:0] rx_tid; +input wire [ 7:0] rx_tdest; +input wire [ 31:0] rx_tuser; +input wire rx_tlast; +output wire rx_tready; +// Tx interface +output wire tx_tvalid; +output wire [511:0] tx_tdata; +output wire [ 63:0] tx_tstrb; +output wire [ 63:0] tx_tkeep; +output wire [ 7:0] tx_tid; +output wire [ 7:0] tx_tdest; +output wire [ 31:0] tx_tuser; +output wire tx_tlast; +input wire tx_tready; + +// Hook up unused Rx signals to dummy registers to avoid being synthesized away +reg [63:0] dummy_rx_tstrb; +reg [63:0] dummy_rx_tkeep; +reg [63:0] dummy_rx_tdest; + +always @ (posedge clk) begin + dummy_rx_tstrb <= rx_tstrb; + dummy_rx_tkeep <= rx_tkeep; + dummy_rx_tdest <= rx_tdest; +end + +wire [8:0] inst_raddr; +assign inst_raddr = rx_tuser[15:9]; +wire [8:0] inst_waddr; +assign inst_waddr = rx_tuser[8:0]; +wire inst_wen; +assign inst_wen = (rx_tid == 0); +wire [511:0] inst_wdata; +assign inst_wdata = rx_tdata; +wire [511:0] inst_rdata; +wire [8:0] inst_rf_raddr, inst_accum_raddr; +wire inst_reduce, inst_accum_en, inst_release, inst_jump, inst_en, inst_last; + +memory_block instruction_fifo (.clk(clk), .waddr(inst_waddr), .wen(inst_wen), .wdata(inst_wdata), .raddr(inst_raddr), .rdata(inst_rdata)); + +assign inst_rf_raddr = inst_rdata[23:15]; +assign inst_accum_raddr = inst_rdata[14:6]; +assign inst_last = inst_rdata[5]; +assign inst_reduce = inst_rdata[2]; +assign inst_accum_en = inst_rdata[3]; +assign inst_release = inst_rdata[4]; +assign inst_jump = inst_rdata[1]; +assign inst_en = inst_rdata[0]; + +wire input_fifo_empty, input_fifo_full; +wire [511:0] input_fifo_idata; +assign input_fifo_idata = rx_tdata; +wire [511:0] input_fifo_odata; +wire input_fifo_push, input_fifo_pop; +assign input_fifo_push = (rx_tid == 2); +assign input_fifo_pop = inst_last; + +fifo_mvm input_fifo (.clk(clk), .rst(rst), .push(input_fifo_push), .idata(input_fifo_idata), .pop(input_fifo_pop), .odata(input_fifo_odata), .empty(input_fifo_empty), .full(input_fifo_full)); + +wire reduction_fifo_empty, reduction_fifo_full; +wire [511:0] reduction_fifo_idata; +assign reduction_fifo_idata = rx_tdata; +wire [511:0] reduction_fifo_odata; +wire reduction_fifo_push, reduction_fifo_pop; +assign reduction_fifo_push = (rx_tid == 1); +assign reduction_fifo_pop = inst_reduce && !reduction_fifo_empty; + +fifo_mvm reduction_fifo (.clk(clk), .rst(rst), .push(reduction_fifo_push), .idata(reduction_fifo_idata), .pop(reduction_fifo_pop), .odata(reduction_fifo_odata), .empty(reduction_fifo_empty), .full(reduction_fifo_full)); + +wire [8:0] accum_mem_waddr; +wire [511:0] accum_mem_rdata; +wire [17:0] temp_accum_addr, delay_accum_addr; +assign temp_accum_addr = {9'b0, inst_accum_raddr}; + +dpe_pipeline accum_addr_pipeline (.clk(clk), .rst(rst), .data_in(temp_accum_addr), .data_out(delay_accum_addr)); + +memory_block accum_mem (.clk(clk), .waddr(delay_accum_addr[8:0]), .wen(dpe_ovalid[0]), .wdata(dpe_results), .raddr(inst_accum_raddr), .rdata(accum_mem_rdata)); + +wire [8:0] rf_waddr; +assign rf_waddr = rx_tuser[8:0]; +wire [511:0] rf_wdata; +assign rf_wdata = rx_tdata; +wire [15:0] rf_wen; +assign rf_wen = rx_tuser[24:9]; +wire [511:0] rf_rdata_1, rf_rdata_2, rf_rdata_3, rf_rdata_4, rf_rdata_5, rf_rdata_6, rf_rdata_7, rf_rdata_8, + rf_rdata_9, rf_rdata_10, rf_rdata_11, rf_rdata_12, rf_rdata_13, rf_rdata_14, rf_rdata_15, rf_rdata_16; + +output wire [511:0] dpe_results; +output wire [15:0] dpe_ovalid; + +memory_block rf_01(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[0]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_1)); +memory_block rf_02(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[1]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_2)); +memory_block rf_03(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[2]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_3)); +memory_block rf_04(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[3]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_4)); +memory_block rf_05(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[4]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_5)); +memory_block rf_06(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[5]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_6)); +memory_block rf_07(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[6]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_7)); +memory_block rf_08(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[7]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_8)); +memory_block rf_09(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[8]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_9)); +memory_block rf_10(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[9]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_10)); +memory_block rf_11(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[10]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_11)); +memory_block rf_12(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[11]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_12)); +memory_block rf_13(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[12]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_13)); +memory_block rf_14(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[13]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_14)); +memory_block rf_15(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[14]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_15)); +memory_block rf_16(.clk(clk), .waddr(rf_waddr), .wen(rf_wen[15]), .wdata(rf_wdata), .raddr(inst_rf_raddr), .rdata(rf_rdata_16)); + +wire dpe_ivalid; +assign dpe_ivalid = inst_en && inst_release; +dpe dpe_01 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_1), .datac(reduction_fifo_odata[(32*1)-1:32*0]), .datad(accum_mem_rdata[(32*1)-1:32*0]), .result(dpe_results[(32*1)-1:32*0]), .ovalid(dpe_ovalid[0])); +dpe dpe_02 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_2), .datac(reduction_fifo_odata[(32*2)-1:32*1]), .datad(accum_mem_rdata[(32*2)-1:32*1]), .result(dpe_results[(32*2)-1:32*1]), .ovalid(dpe_ovalid[1])); +dpe dpe_03 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_3), .datac(reduction_fifo_odata[(32*3)-1:32*2]), .datad(accum_mem_rdata[(32*3)-1:32*2]), .result(dpe_results[(32*3)-1:32*2]), .ovalid(dpe_ovalid[2])); +dpe dpe_04 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_4), .datac(reduction_fifo_odata[(32*4)-1:32*3]), .datad(accum_mem_rdata[(32*4)-1:32*3]), .result(dpe_results[(32*4)-1:32*3]), .ovalid(dpe_ovalid[3])); +dpe dpe_05 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_5), .datac(reduction_fifo_odata[(32*5)-1:32*4]), .datad(accum_mem_rdata[(32*5)-1:32*4]), .result(dpe_results[(32*5)-1:32*4]), .ovalid(dpe_ovalid[4])); +dpe dpe_06 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_6), .datac(reduction_fifo_odata[(32*6)-1:32*5]), .datad(accum_mem_rdata[(32*6)-1:32*5]), .result(dpe_results[(32*6)-1:32*5]), .ovalid(dpe_ovalid[5])); +dpe dpe_07 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_7), .datac(reduction_fifo_odata[(32*7)-1:32*6]), .datad(accum_mem_rdata[(32*7)-1:32*6]), .result(dpe_results[(32*7)-1:32*6]), .ovalid(dpe_ovalid[6])); +dpe dpe_08 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_8), .datac(reduction_fifo_odata[(32*8)-1:32*7]), .datad(accum_mem_rdata[(32*8)-1:32*7]), .result(dpe_results[(32*8)-1:32*7]), .ovalid(dpe_ovalid[7])); +dpe dpe_09 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_9), .datac(reduction_fifo_odata[(32*9)-1:32*8]), .datad(accum_mem_rdata[(32*9)-1:32*8]), .result(dpe_results[(32*9)-1:32*8]), .ovalid(dpe_ovalid[8])); +dpe dpe_10 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_10), .datac(reduction_fifo_odata[(32*10)-1:32*9]), .datad(accum_mem_rdata[(32*10)-1:32*9]), .result(dpe_results[(32*10)-1:32*9]), .ovalid(dpe_ovalid[9])); +dpe dpe_11 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_11), .datac(reduction_fifo_odata[(32*11)-1:32*10]), .datad(accum_mem_rdata[(32*11)-1:32*10]), .result(dpe_results[(32*11)-1:32*10]), .ovalid(dpe_ovalid[10])); +dpe dpe_12 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_12), .datac(reduction_fifo_odata[(32*12)-1:32*11]), .datad(accum_mem_rdata[(32*12)-1:32*11]), .result(dpe_results[(32*12)-1:32*11]), .ovalid(dpe_ovalid[11])); +dpe dpe_13 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_13), .datac(reduction_fifo_odata[(32*13)-1:32*12]), .datad(accum_mem_rdata[(32*13)-1:32*12]), .result(dpe_results[(32*13)-1:32*12]), .ovalid(dpe_ovalid[12])); +dpe dpe_14 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_14), .datac(reduction_fifo_odata[(32*14)-1:32*13]), .datad(accum_mem_rdata[(32*14)-1:32*13]), .result(dpe_results[(32*14)-1:32*13]), .ovalid(dpe_ovalid[13])); +dpe dpe_15 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_15), .datac(reduction_fifo_odata[(32*15)-1:32*14]), .datad(accum_mem_rdata[(32*15)-1:32*14]), .result(dpe_results[(32*15)-1:32*14]), .ovalid(dpe_ovalid[14])); +dpe dpe_16 (.clk(clk), .rst(rst), .ivalid(dpe_ivalid), .accum(inst_accum_en), .reduce(inst_reduce), .dataa(input_fifo_odata), .datab(rf_rdata_16), .datac(reduction_fifo_odata[(32*16)-1:32*15]), .datad(accum_mem_rdata[(32*16)-1:32*15]), .result(dpe_results[(32*16)-1:32*15]), .ovalid(dpe_ovalid[15])); + +wire output_fifo_empty, output_fifo_full; +wire [511:0] output_fifo_odata; +wire output_fifo_pop; +assign output_fifo_pop = tx_tready && !output_fifo_empty; +fifo_mvm output_fifo (.clk(clk), .rst(rst), .push(dpe_ovalid[0]), .idata(dpe_results), .pop(output_fifo_pop), .odata(output_fifo_odata), .empty(output_fifo_empty), .full(output_fifo_full)); + +reg [ 63:0] r_tx_tstrb; +reg [ 63:0] r_tx_tkeep; +reg [ 7:0] r_tx_tid; +reg [ 7:0] r_tx_tdest; +reg [ 31:0] r_tx_tuser; +reg r_tx_tlast; +always @ (posedge clk) begin + if (rst) begin + r_tx_tstrb <= 0; + r_tx_tkeep <= 0; + r_tx_tid <= 0; + r_tx_tdest <= 0; + r_tx_tuser <= 0; + r_tx_tlast <= 0; + end else begin + r_tx_tstrb <= rx_tstrb; + r_tx_tkeep <= rx_tkeep; + r_tx_tid <= rx_tid; + r_tx_tdest <= rx_tdest; + r_tx_tuser <= rx_tuser; + r_tx_tlast <= rx_tlast; + end +end + +assign tx_tvalid = tx_tready && !output_fifo_empty; +assign tx_tdata = output_fifo_odata; +assign tx_tstrb = r_tx_tstrb; +assign tx_tkeep = r_tx_tkeep; +assign tx_tid = r_tx_tid; +assign tx_tdest = r_tx_tdest; +assign tx_tuser = r_tx_tuser; +assign tx_tlast = r_tx_tlast; +assign rx_tready = !input_fifo_full; + +endmodule + + +module dpe ( + clk, + rst, + ivalid, + accum, + reduce, + dataa, + datab, + datac, + datad, + result, + ovalid +); + +input wire clk; +input wire rst; +input wire ivalid; +input wire accum; +input wire reduce; +input wire [511:0] dataa; +input wire [511:0] datab; +input wire [ 31:0] datac; +input wire [ 31:0] datad; +output wire [ 31:0] result; +output wire ovalid; + +wire [36:0] chain_atom01_to_atom00, chain_atom02_to_atom01, chain_atom03_to_atom02, chain_atom04_to_atom03, + chain_atom05_to_atom04, chain_atom06_to_atom05, chain_atom07_to_atom06, chain_atom08_to_atom07, + chain_atom09_to_atom08, chain_atom10_to_atom09, chain_atom11_to_atom10, chain_atom12_to_atom11, + chain_atom13_to_atom12, chain_atom14_to_atom13, chain_atom15_to_atom14, dummy_chain; +wire [31:0] res15, res14, res13, res12, res11, res10, res09, res08, res07, res06, res05, res04, res03, res02, res01, res00; + +wire [33:0] temp_datac, temp_datad, delay_datac, delay_datad; +assign temp_datac = {ivalid, accum, datac}; +assign temp_datad = {ivalid, reduce, datad}; + +dpe_pipeline datac_pipe (.clk(clk), .rst(rst), .data_in(temp_datac), .data_out(delay_datac)); +dpe_pipeline datad_pipe (.clk(clk), .rst(rst), .data_in(temp_datad), .data_out(delay_datad)); + +dsp_inst d15(.clk(clk), .reset(rst), .ax(dataa[ (16*1)-1: 16*0]), .ay(datab[ (16*1)-1: 16*0]), .bx(dataa[ (16*2)-1: 16*1]), .by(datab[ (16*2)-1: 16*1]), .chainin( 37'd0), .result(res15), .chainout(chain_atom15_to_atom14)); +dsp_inst d14(.clk(clk), .reset(rst), .ax(dataa[ (16*3)-1: 16*2]), .ay(datab[ (16*3)-1: 16*2]), .bx(dataa[ (16*4)-1: 16*3]), .by(datab[ (16*4)-1: 16*3]), .chainin(chain_atom15_to_atom14), .result(res14), .chainout(chain_atom14_to_atom13)); +dsp_inst d13(.clk(clk), .reset(rst), .ax(dataa[ (16*5)-1: 16*4]), .ay(datab[ (16*5)-1: 16*4]), .bx(dataa[ (16*6)-1: 16*5]), .by(datab[ (16*6)-1: 16*5]), .chainin(chain_atom14_to_atom13), .result(res13), .chainout(chain_atom13_to_atom12)); +dsp_inst d12(.clk(clk), .reset(rst), .ax(dataa[ (16*7)-1: 16*6]), .ay(datab[ (16*7)-1: 16*6]), .bx(dataa[ (16*8)-1: 16*7]), .by(datab[ (16*8)-1: 16*7]), .chainin(chain_atom13_to_atom12), .result(res12), .chainout(chain_atom12_to_atom11)); +dsp_inst d11(.clk(clk), .reset(rst), .ax(dataa[ (16*9)-1: 16*8]), .ay(datab[ (16*9)-1: 16*8]), .bx(dataa[(16*10)-1: 16*9]), .by(datab[(16*10)-1: 16*9]), .chainin(chain_atom12_to_atom11), .result(res11), .chainout(chain_atom11_to_atom10)); +dsp_inst d10(.clk(clk), .reset(rst), .ax(dataa[(16*11)-1:16*10]), .ay(datab[(16*11)-1:16*10]), .bx(dataa[(16*12)-1:16*11]), .by(datab[(16*12)-1:16*11]), .chainin(chain_atom11_to_atom10), .result(res10), .chainout(chain_atom10_to_atom09)); +dsp_inst d09(.clk(clk), .reset(rst), .ax(dataa[(16*13)-1:16*12]), .ay(datab[(16*13)-1:16*12]), .bx(dataa[(16*14)-1:16*13]), .by(datab[(16*14)-1:16*13]), .chainin(chain_atom10_to_atom09), .result(res09), .chainout(chain_atom09_to_atom08)); +dsp_inst d08(.clk(clk), .reset(rst), .ax(dataa[(16*15)-1:16*14]), .ay(datab[(16*15)-1:16*14]), .bx(dataa[(16*16)-1:16*15]), .by(datab[(16*16)-1:16*15]), .chainin(chain_atom09_to_atom08), .result(res08), .chainout(chain_atom08_to_atom07)); +dsp_inst d07(.clk(clk), .reset(rst), .ax(dataa[(16*17)-1:16*16]), .ay(datab[(16*17)-1:16*16]), .bx(dataa[(16*18)-1:16*17]), .by(datab[(16*18)-1:16*17]), .chainin(chain_atom08_to_atom07), .result(res07), .chainout(chain_atom07_to_atom06)); +dsp_inst d06(.clk(clk), .reset(rst), .ax(dataa[(16*19)-1:16*18]), .ay(datab[(16*19)-1:16*18]), .bx(dataa[(16*20)-1:16*19]), .by(datab[(16*20)-1:16*19]), .chainin(chain_atom07_to_atom06), .result(res06), .chainout(chain_atom06_to_atom05)); +dsp_inst d05(.clk(clk), .reset(rst), .ax(dataa[(16*21)-1:16*20]), .ay(datab[(16*21)-1:16*20]), .bx(dataa[(16*22)-1:16*21]), .by(datab[(16*22)-1:16*21]), .chainin(chain_atom06_to_atom05), .result(res05), .chainout(chain_atom05_to_atom04)); +dsp_inst d04(.clk(clk), .reset(rst), .ax(dataa[(16*23)-1:16*22]), .ay(datab[(16*23)-1:16*22]), .bx(dataa[(16*24)-1:16*23]), .by(datab[(16*24)-1:16*23]), .chainin(chain_atom05_to_atom04), .result(res04), .chainout(chain_atom04_to_atom03)); +dsp_inst d03(.clk(clk), .reset(rst), .ax(dataa[(16*25)-1:16*24]), .ay(datab[(16*25)-1:16*24]), .bx(dataa[(16*26)-1:16*25]), .by(datab[(16*26)-1:16*25]), .chainin(chain_atom04_to_atom03), .result(res03), .chainout(chain_atom03_to_atom02)); +dsp_inst d02(.clk(clk), .reset(rst), .ax(dataa[(16*27)-1:16*26]), .ay(datab[(16*27)-1:16*26]), .bx(dataa[(16*28)-1:16*27]), .by(datab[(16*28)-1:16*27]), .chainin(chain_atom03_to_atom02), .result(res02), .chainout(chain_atom02_to_atom01)); +dsp_inst d01(.clk(clk), .reset(rst), .ax(dataa[(16*29)-1:16*28]), .ay(datab[(16*29)-1:16*28]), .bx(dataa[(16*30)-1:16*29]), .by(datab[(16*30)-1:16*29]), .chainin(chain_atom02_to_atom01), .result(res01), .chainout(chain_atom01_to_atom00)); +dsp_inst d00(.clk(clk), .reset(rst), .ax(dataa[(16*31)-1:16*30]), .ay(datab[(16*31)-1:16*30]), .bx(dataa[(16*32)-1:16*31]), .by(datab[(16*32)-1:16*31]), .chainin(chain_atom01_to_atom00), .result(res00), .chainout( dummy_chain)); + +reg [31:0] r_result; +reg r_ovalid; + +always @ (posedge clk) begin + if (rst) begin + r_result <= 0; + r_ovalid <= 1'b0; + end else begin + if (delay_datac[33]) begin + if (delay_datac[32] && delay_datad[32]) begin + r_result <= res00 + delay_datac[31:0] + delay_datad[31:0]; + end else if (delay_datac[32] && !delay_datad[32]) begin + r_result <= res00 + delay_datac[31:0]; + end else if (!delay_datac[32] && delay_datad[32]) begin + r_result <= res00 + delay_datad[31:0]; + end else begin + r_result <= res00; + end + end + r_ovalid <= delay_datac[33] && delay_datad[33]; + end +end + +assign result = r_result; +assign ovalid = r_ovalid; + +endmodule + +module memory_block ( + clk, + waddr, + wen, + wdata, + raddr, + rdata +); + +input wire clk; +input wire [ 8:0] waddr; +input wire wen; +input wire [511:0] wdata; +input wire [ 8:0] raddr; +output wire [511:0] rdata; + +bram_inst b0 (.clk(clk), .wen(wen), .wdata(wdata[(32*1)-1:32*0]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*1)-1:32*0])); +bram_inst b1 (.clk(clk), .wen(wen), .wdata(wdata[(32*2)-1:32*1]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*2)-1:32*1])); +bram_inst b2 (.clk(clk), .wen(wen), .wdata(wdata[(32*3)-1:32*2]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*3)-1:32*2])); +bram_inst b3 (.clk(clk), .wen(wen), .wdata(wdata[(32*4)-1:32*3]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*4)-1:32*3])); +bram_inst b4 (.clk(clk), .wen(wen), .wdata(wdata[(32*5)-1:32*4]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*5)-1:32*4])); +bram_inst b5 (.clk(clk), .wen(wen), .wdata(wdata[(32*6)-1:32*5]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*6)-1:32*5])); +bram_inst b6 (.clk(clk), .wen(wen), .wdata(wdata[(32*7)-1:32*6]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*7)-1:32*6])); +bram_inst b7 (.clk(clk), .wen(wen), .wdata(wdata[(32*8)-1:32*7]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*8)-1:32*7])); +bram_inst b8 (.clk(clk), .wen(wen), .wdata(wdata[(32*9)-1:32*8]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*9)-1:32*8])); +bram_inst b9 (.clk(clk), .wen(wen), .wdata(wdata[(32*10)-1:32*9]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*10)-1:32*9])); +bram_inst b10 (.clk(clk), .wen(wen), .wdata(wdata[(32*11)-1:32*10]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*11)-1:32*10])); +bram_inst b11 (.clk(clk), .wen(wen), .wdata(wdata[(32*12)-1:32*11]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*12)-1:32*11])); +bram_inst b12 (.clk(clk), .wen(wen), .wdata(wdata[(32*13)-1:32*12]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*13)-1:32*12])); +bram_inst b13 (.clk(clk), .wen(wen), .wdata(wdata[(32*14)-1:32*13]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*14)-1:32*13])); +bram_inst b14 (.clk(clk), .wen(wen), .wdata(wdata[(32*15)-1:32*14]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*15)-1:32*14])); +bram_inst b15 (.clk(clk), .wen(wen), .wdata(wdata[(32*16)-1:32*15]), .waddr(waddr), .raddr(raddr), .rdata(rdata[(32*16)-1:32*15])); + +endmodule + +module fifo_mvm ( + clk, + rst, + push, + idata, + pop, + odata, + empty, + full +); + +input wire clk; +input wire rst; +input wire push; +input wire [511:0] idata; +input wire pop; +output wire [511:0] odata; +output reg empty; +output reg full; + +reg [8:0] head_ptr; +reg [8:0] tail_ptr; + +bram_inst b0 (.clk(clk), .wen(push), .wdata(idata[(32*1)-1:32*0]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*1)-1:32*0])); +bram_inst b1 (.clk(clk), .wen(push), .wdata(idata[(32*2)-1:32*1]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*2)-1:32*1])); +bram_inst b2 (.clk(clk), .wen(push), .wdata(idata[(32*3)-1:32*2]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*3)-1:32*2])); +bram_inst b3 (.clk(clk), .wen(push), .wdata(idata[(32*4)-1:32*3]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*4)-1:32*3])); +bram_inst b4 (.clk(clk), .wen(push), .wdata(idata[(32*5)-1:32*4]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*5)-1:32*4])); +bram_inst b5 (.clk(clk), .wen(push), .wdata(idata[(32*6)-1:32*5]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*6)-1:32*5])); +bram_inst b6 (.clk(clk), .wen(push), .wdata(idata[(32*7)-1:32*6]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*7)-1:32*6])); +bram_inst b7 (.clk(clk), .wen(push), .wdata(idata[(32*8)-1:32*7]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*8)-1:32*7])); +bram_inst b8 (.clk(clk), .wen(push), .wdata(idata[(32*9)-1:32*8]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*9)-1:32*8])); +bram_inst b9 (.clk(clk), .wen(push), .wdata(idata[(32*10)-1:32*9]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*10)-1:32*9])); +bram_inst b10 (.clk(clk), .wen(push), .wdata(idata[(32*11)-1:32*10]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*11)-1:32*10])); +bram_inst b11 (.clk(clk), .wen(push), .wdata(idata[(32*12)-1:32*11]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*12)-1:32*11])); +bram_inst b12 (.clk(clk), .wen(push), .wdata(idata[(32*13)-1:32*12]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*13)-1:32*12])); +bram_inst b13 (.clk(clk), .wen(push), .wdata(idata[(32*14)-1:32*13]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*14)-1:32*13])); +bram_inst b14 (.clk(clk), .wen(push), .wdata(idata[(32*15)-1:32*14]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*15)-1:32*14])); +bram_inst b15 (.clk(clk), .wen(push), .wdata(idata[(32*16)-1:32*15]), .waddr(tail_ptr), .raddr(head_ptr), .rdata(odata[(32*16)-1:32*15])); + +always @ (posedge clk) begin + if (rst) begin + head_ptr <= 0; + tail_ptr <= 0; + end else begin + if (push) tail_ptr <= tail_ptr + 1; + if (pop) head_ptr <= head_ptr + 1; + end +end + +always @ (*) begin + if (tail_ptr == head_ptr) begin + empty = 1'b1; + end else begin + empty = 1'b0; + end + + if (tail_ptr+1 == head_ptr) begin + full = 1'b1; + end else begin + full = 1'b0; + end +end + +endmodule + + +module bram_inst( + clk, + wen, + wdata, + waddr, + raddr, + rdata +); + +input wire clk; +input wire wen; +input wire [31:0] wdata; +input wire [ 8:0] waddr; +input wire [ 8:0] raddr; +output wire [31:0] rdata; + +wire [39:0] rtemp; +wire [39:0] wtemp; +wire [8:0] addrtemp; +assign rdata = rtemp[31:0]; +assign wtemp = {8'd0, wdata}; +assign addrtemp = waddr | raddr; + +single_port_ram bram_instance( + .clk(clk), + .we(wen), + .data(wtemp), + .addr(addrtemp), + .out(rtemp) +); + +endmodule + + +module dpe_pipeline ( + clk, + rst, + data_in, + data_out +); + +input wire clk; +input wire rst; +input wire [33:0] data_in; +output wire [33:0] data_out; + +reg [33:0] r_pipeline_00; +reg [33:0] r_pipeline_01; +reg [33:0] r_pipeline_02; +reg [33:0] r_pipeline_03; +reg [33:0] r_pipeline_04; +reg [33:0] r_pipeline_05; +reg [33:0] r_pipeline_06; +reg [33:0] r_pipeline_07; +reg [33:0] r_pipeline_08; +reg [33:0] r_pipeline_09; +reg [33:0] r_pipeline_10; +reg [33:0] r_pipeline_11; +reg [33:0] r_pipeline_12; +reg [33:0] r_pipeline_13; +reg [33:0] r_pipeline_14; +reg [33:0] r_pipeline_15; +reg [33:0] r_pipeline_16; +reg [33:0] r_pipeline_17; +reg [33:0] r_pipeline_18; +reg [33:0] r_pipeline_19; +reg [33:0] r_pipeline_20; +reg [33:0] r_pipeline_21; +reg [33:0] r_pipeline_22; +reg [33:0] r_pipeline_23; +reg [33:0] r_pipeline_24; +reg [33:0] r_pipeline_25; +reg [33:0] r_pipeline_26; +reg [33:0] r_pipeline_27; +reg [33:0] r_pipeline_28; +reg [33:0] r_pipeline_29; +reg [33:0] r_pipeline_30; +reg [33:0] r_pipeline_31; + +always @ (posedge clk) begin + if (rst) begin + r_pipeline_00 <= 0; + r_pipeline_01 <= 0; + r_pipeline_02 <= 0; + r_pipeline_03 <= 0; + r_pipeline_04 <= 0; + r_pipeline_05 <= 0; + r_pipeline_06 <= 0; + r_pipeline_07 <= 0; + r_pipeline_08 <= 0; + r_pipeline_09 <= 0; + r_pipeline_10 <= 0; + r_pipeline_11 <= 0; + r_pipeline_12 <= 0; + r_pipeline_13 <= 0; + r_pipeline_14 <= 0; + r_pipeline_15 <= 0; + r_pipeline_16 <= 0; + r_pipeline_17 <= 0; + r_pipeline_18 <= 0; + r_pipeline_19 <= 0; + r_pipeline_20 <= 0; + r_pipeline_21 <= 0; + r_pipeline_22 <= 0; + r_pipeline_23 <= 0; + r_pipeline_24 <= 0; + r_pipeline_25 <= 0; + r_pipeline_26 <= 0; + r_pipeline_27 <= 0; + r_pipeline_28 <= 0; + r_pipeline_29 <= 0; + r_pipeline_30 <= 0; + r_pipeline_31 <= 0; + end else begin + r_pipeline_00 <= r_pipeline_01; + r_pipeline_01 <= r_pipeline_02; + r_pipeline_02 <= r_pipeline_03; + r_pipeline_03 <= r_pipeline_04; + r_pipeline_04 <= r_pipeline_05; + r_pipeline_05 <= r_pipeline_06; + r_pipeline_06 <= r_pipeline_07; + r_pipeline_07 <= r_pipeline_08; + r_pipeline_08 <= r_pipeline_09; + r_pipeline_09 <= r_pipeline_10; + r_pipeline_10 <= r_pipeline_11; + r_pipeline_11 <= r_pipeline_12; + r_pipeline_12 <= r_pipeline_13; + r_pipeline_13 <= r_pipeline_14; + r_pipeline_14 <= r_pipeline_15; + r_pipeline_15 <= r_pipeline_16; + r_pipeline_16 <= r_pipeline_17; + r_pipeline_17 <= r_pipeline_18; + r_pipeline_18 <= r_pipeline_19; + r_pipeline_19 <= r_pipeline_20; + r_pipeline_20 <= r_pipeline_21; + r_pipeline_21 <= r_pipeline_22; + r_pipeline_22 <= r_pipeline_23; + r_pipeline_23 <= r_pipeline_24; + r_pipeline_24 <= r_pipeline_25; + r_pipeline_25 <= r_pipeline_26; + r_pipeline_26 <= r_pipeline_27; + r_pipeline_27 <= r_pipeline_28; + r_pipeline_28 <= r_pipeline_29; + r_pipeline_29 <= r_pipeline_30; + r_pipeline_30 <= r_pipeline_31; + r_pipeline_31 <= data_in; + end +end + +assign data_out = r_pipeline_00; + +endmodule + + +module dsp_inst( + clk, + reset, + ax, + ay, + bx, + by, + chainin, + result, + chainout +); + +input wire clk; +input wire reset; +input wire [15:0] ax; +input wire [15:0] ay; +input wire [15:0] bx; +input wire [15:0] by; +input wire [36:0] chainin; +output wire [31:0] result; +output wire [36:0] chainout; + +wire [18:0] tmp_ax; +wire [19:0] tmp_ay; +wire [18:0] tmp_bx; +wire [19:0] tmp_by; +wire [36:0] tmp_result; + +assign tmp_ax = {2'b0, ax}; +assign tmp_ay = {3'b0, ay}; +assign tmp_bx = {2'b0, bx}; +assign tmp_by = {3'b0, by}; +assign result = tmp_result[31:0]; + +int_sop_2 dsp_instance( + .clk(clk), + .reset(reset), + .ax(tmp_ax), + .ay(tmp_ay), + .bx(tmp_bx), + .by(tmp_by), + .chainin(chainin), + .result(tmp_result), + .chainout(chainout) +); + +endmodule \ No newline at end of file diff --git a/vtr_flow/scripts/python_libs/vtr/__init__.py b/vtr_flow/scripts/python_libs/vtr/__init__.py index 49211fe1456..6a0b38d7639 100644 --- a/vtr_flow/scripts/python_libs/vtr/__init__.py +++ b/vtr_flow/scripts/python_libs/vtr/__init__.py @@ -11,10 +11,12 @@ format_elapsed_time, write_tab_delimitted_csv, load_list_file, + argparse_use_previous, argparse_str2bool, - get_next_run_dir, + get_existing_run_dir, get_latest_run_dir, get_latest_run_number, + get_next_run_dir, verify_file, pretty_print_table, find_task_dir, diff --git a/vtr_flow/scripts/python_libs/vtr/flow.py b/vtr_flow/scripts/python_libs/vtr/flow.py index 0aab0f8f3a3..a1f14e2816e 100644 --- a/vtr_flow/scripts/python_libs/vtr/flow.py +++ b/vtr_flow/scripts/python_libs/vtr/flow.py @@ -56,6 +56,7 @@ def run( relax_w_factor=1.3, check_route=False, check_place=False, + no_second_run=False, ): """ Runs the VTR CAD flow to map the specified circuit_file onto the target architecture_file @@ -130,6 +131,9 @@ def run( check_place: Route existing placement by enabling VPR routing. + + no_second_run: + Don't run VPR again even if it's writing out some intermediate files. """ # @@ -300,6 +304,9 @@ def run( ): do_second_run = True + if no_second_run: + do_second_run = False + vtr.vpr.run( architecture_copy, pre_vpr_netlist, diff --git a/vtr_flow/scripts/python_libs/vtr/task.py b/vtr_flow/scripts/python_libs/vtr/task.py index cd51bd0b403..6bf898a5d22 100644 --- a/vtr_flow/scripts/python_libs/vtr/task.py +++ b/vtr_flow/scripts/python_libs/vtr/task.py @@ -1,20 +1,24 @@ """ Module that contains the task functions """ +import itertools + from pathlib import Path from pathlib import PurePath from shlex import split -import itertools + +from typing import List, Tuple from vtr import ( VtrError, InspectError, load_list_file, load_parse_results, + get_existing_run_dir, + get_latest_run_dir, get_next_run_dir, find_task_dir, load_script_param, - get_latest_run_dir, paths, ) @@ -82,7 +86,7 @@ def __init__( class Job: """ - A class to store the nessesary information for a job that needs to be run. + A class to store the necessary information for a job that needs to be run. """ def __init__( @@ -169,7 +173,7 @@ def qor_parse_command(self): """ return self._qor_parse_command - def work_dir(self, run_dir): + def work_dir(self, run_dir: str) -> str: """ return the work directory of the job """ @@ -179,7 +183,7 @@ def work_dir(self, run_dir): # pylint: enable=too-many-instance-attributes -def load_task_config(config_file): +def load_task_config(config_file) -> TaskConfig: """ Load task config information """ @@ -245,7 +249,7 @@ def load_task_config(config_file): else: # All valid keys should have been collected by now raise VtrError( - "Unrecognzied key '{key}' in config file {file}".format(key=key, file=config_file) + "Unrecognized key '{key}' in config file {file}".format(key=key, file=config_file) ) # We split the script params into a list @@ -351,7 +355,10 @@ def create_second_parse_cmd(config): return second_parse_cmd -def create_cmd(abs_circuit_filepath, abs_arch_filepath, config, args, circuit, noc_traffic): +# pylint: disable=too-many-branches +def create_cmd( + abs_circuit_filepath, abs_arch_filepath, config, args, circuit, noc_traffic +) -> Tuple: """ Create the command to run the task """ # Collect any extra script params from the config file cmd = [abs_circuit_filepath, abs_arch_filepath] @@ -410,6 +417,20 @@ def create_cmd(abs_circuit_filepath, abs_arch_filepath, config, args, circuit, n cmd += ["--fix_clusters", "{}".format(place_constr_file)] + # parse_vtr_task doesn't have these in args, so use getattr here + if getattr(args, "write_rr_graphs", None): + cmd += [ + "--write_rr_graph", + "{}.rr_graph.xml".format(Path(circuit).stem), + ] # Use XML format instead of capnp (see #2352) + + if getattr(args, "write_lookaheads", None): + cmd += ["--write_router_lookahead", "{}.lookahead.bin".format(Path(circuit).stem)] + + if getattr(args, "write_rr_graphs", None) or getattr(args, "write_lookaheads", None): + # Don't trigger a second run, we just want the files + cmd += ["-no_second_run"] + parse_cmd = None qor_parse_command = None if config.parse_file: @@ -446,7 +467,7 @@ def create_cmd(abs_circuit_filepath, abs_arch_filepath, config, args, circuit, n # pylint: disable=too-many-branches -def create_jobs(args, configs, after_run=False): +def create_jobs(args, configs, after_run=False) -> List[Job]: """ Create the jobs to be executed depending on the configs. """ @@ -539,7 +560,7 @@ def create_job( work_dir, run_dir, golden_results, -): +) -> Job: """ Create an individual job with the specified parameters """ @@ -607,6 +628,15 @@ def create_job( current_cmd = cmd.copy() current_cmd += ["-temp_dir", run_dir + "/{}".format(param_string)] + if getattr(args, "use_previous", None): + for prev_run, [extension, option] in args.use_previous: + prev_run_dir = get_existing_run_dir(find_task_dir(config, args.alt_tasks_dir), prev_run) + prev_work_path = Path(prev_run_dir) / work_dir / param_string + prev_file = prev_work_path / "{}.{}".format(Path(circuit).stem, extension) + if not prev_file.exists(): + raise FileNotFoundError("use_previous: file %s not found" % str(prev_file)) + current_cmd += [option, str(prev_file)] + if param_string != "common": current_cmd += param.split(" ") diff --git a/vtr_flow/scripts/python_libs/vtr/util.py b/vtr_flow/scripts/python_libs/vtr/util.py index 6243063c6ea..8eec41661ba 100644 --- a/vtr_flow/scripts/python_libs/vtr/util.py +++ b/vtr_flow/scripts/python_libs/vtr/util.py @@ -1,16 +1,21 @@ """ Module to utilize many of the tools needed for VTR. """ -from pathlib import PurePath -from pathlib import Path + import sys import re import time import subprocess import argparse import csv + from collections import OrderedDict +from pathlib import PurePath +from pathlib import Path +from typing import List, Tuple + from prettytable import PrettyTable + import vtr.error from vtr.error import CommandError from vtr import paths @@ -335,7 +340,7 @@ def relax_w(min_w, relax_factor, base=2): return relaxed_w -def load_list_file(list_file): +def load_list_file(list_file: str) -> List[str]: """ Loads a file containing a single value-per-line, potentially with '#' comments @@ -429,6 +434,39 @@ def format_elapsed_time(time_delta): return "%.2f seconds" % time_delta.total_seconds() +# Files that can be read back by VPR with their conventional extensions +# and the command line option to read them. +REUSABLE_FILES = { + "net": ["net", "--net_file"], + "place": ["place", "--place_file"], + "route": ["route", "--route_file"], + "rr_graph": ["rr_graph.xml", "--read_rr_graph"], + "lookahead": ["lookahead.bin", "--read_router_lookahead"], +} + + +def argparse_use_previous(inp: str) -> List[Tuple[str, List]]: + """ + Parse a -use_previous parameter. Throw if not valid. + Returns a list with (run dir name, [extension, cmdline option]) elements. + """ + tokens = [w.strip() for w in inp.split(",")] + tokens = [w for w in tokens if len(w)] + out = [] + for w in tokens: + r = re.fullmatch(r"(\w+):(\w+)", w) + if not r: + raise argparse.ArgumentTypeError("Invalid input to -use_previous: %s" % w) + if not REUSABLE_FILES.get(r.group(2)): + raise argparse.ArgumentTypeError( + "Unknown file type to use_previous: %s, available types: %s" + % (r.group(2), ",".join(REUSABLE_FILES.keys())) + ) + out.append((r.group(1), REUSABLE_FILES[r.group(2)])) + + return out + + def argparse_str2bool(str_val): """ parses a string boolean to a boolean @@ -481,6 +519,18 @@ def get_latest_run_dir(base_dir): return str(PurePath(base_dir) / run_dir_name(latest_run_number)) +def get_existing_run_dir(base_dir: str, run_dir: str) -> str: + """ + Get an existing run directory (from a previous run). Throw if it doesn't exist + """ + path = Path(base_dir) / run_dir + if not path.exists(): + raise FileNotFoundError( + "Couldn't find previous run directory %s in %s" % (base_dir, run_dir) + ) + return str(path) + + def get_next_run_number(base_dir): """ Returns the next available (i.e. non-existing) run number in base_dir diff --git a/vtr_flow/scripts/python_libs/vtr/vpr/vpr.py b/vtr_flow/scripts/python_libs/vtr/vpr/vpr.py index d0e5953fbe0..003adb9f8cb 100644 --- a/vtr_flow/scripts/python_libs/vtr/vpr/vpr.py +++ b/vtr_flow/scripts/python_libs/vtr/vpr/vpr.py @@ -7,7 +7,7 @@ from vtr import CommandRunner, relax_w, determine_min_w, verify_file, paths from vtr.error import InspectError -# pylint: disable=too-many-arguments +# pylint: disable=too-many-arguments,too-many-locals def run_relax_w( architecture, circuit, @@ -70,13 +70,15 @@ def run_relax_w( vpr_min_w_log = ".".join([logfile_base, "out"]) vpr_relaxed_w_log = ".".join([logfile_base, "crit_path", "out"]) - crit_path_router_iterations = None + crit_path_router_iterations = None if "crit_path_router_iterations" in vpr_args: crit_path_router_iterations = vpr_args["crit_path_router_iterations"] del vpr_args["crit_path_router_iterations"] - if "write_rr_graph" in vpr_args: + write_rr_graph = None + if "write_rr_graph" in vpr_args: # Don't write out rr_graph on the first run + write_rr_graph = vpr_args["write_rr_graph"] del vpr_args["write_rr_graph"] if vpr_exec is None: @@ -105,9 +107,11 @@ def run_relax_w( vpr_args["route"] = True # Re-route only vpr_args["route_chan_width"] = relaxed_w # At a fixed channel width + if write_rr_graph: # Write out rr_graph with known W + vpr_args["write_rr_graph"] = write_rr_graph + # VPR does not support performing routing when fixed pins # are specified, and placement is not run; so remove the option - run( architecture, circuit, diff --git a/vtr_flow/scripts/run_vtr_flow.py b/vtr_flow/scripts/run_vtr_flow.py index 7a03918e80b..118ab030186 100755 --- a/vtr_flow/scripts/run_vtr_flow.py +++ b/vtr_flow/scripts/run_vtr_flow.py @@ -187,7 +187,7 @@ def vtr_command_argparser(prog=None): house_keeping.add_argument( "-temp_dir", default=None, - help="Directory to run the flow in (will be created if non-existant).", + help="Directory to run the flow in (will be created if non-existent).", ) house_keeping.add_argument("-name", default=None, help="Name for this run to be output.") @@ -398,11 +398,17 @@ def vtr_command_argparser(prog=None): action="store_true", help="Tells VPR to verify the routing resource graph.", ) + vpr.add_argument( + "-no_second_run", + default=False, + action="store_true", + help="Don't run VPR a second time to check if it can read intermediate files.", + ) vpr.add_argument( "-rr_graph_ext", default=".xml", type=str, - help="Determines the output rr_graph files' extention.", + help="Determines the output rr_graph files' extension.", ) vpr.add_argument( "-check_route", @@ -575,6 +581,7 @@ def vtr_command_main(arg_list, prog=None): relax_w_factor=args.relax_w_factor, check_route=args.check_route, check_place=args.check_place, + no_second_run=args.no_second_run, ) error_status = "OK" except vtr.VtrError as error: @@ -583,7 +590,7 @@ def vtr_command_main(arg_list, prog=None): ) except KeyboardInterrupt as error: - print("{} recieved keyboard interrupt".format(prog)) + print("{} received keyboard interrupt".format(prog)) exit_status = 4 return_status = exit_status diff --git a/vtr_flow/scripts/run_vtr_task.py b/vtr_flow/scripts/run_vtr_task.py index 51a1d4bf9f4..0d9c5013181 100755 --- a/vtr_flow/scripts/run_vtr_task.py +++ b/vtr_flow/scripts/run_vtr_task.py @@ -3,18 +3,18 @@ """ This module is a wrapper around the scripts/python_libs/vtr, allowing the user to run one or more VTR tasks. """ - -from pathlib import Path -from pathlib import PurePath -import sys -import os import argparse -import textwrap +import os import subprocess -from datetime import datetime +import sys +import textwrap + from contextlib import redirect_stdout -from multiprocessing import Pool, Manager +from datetime import datetime from difflib import SequenceMatcher +from multiprocessing import Pool, Manager +from pathlib import Path +from pathlib import PurePath from run_vtr_flow import vtr_command_main as run_vtr_flow @@ -26,6 +26,7 @@ format_elapsed_time, RawDefaultHelpFormatter, argparse_str2bool, + argparse_use_previous, get_next_run_dir, load_task_config, find_task_config_file, @@ -202,6 +203,34 @@ def vtr_command_argparser(prog=None): help="Print meta-data like command-line arguments and run-time", ) + parser.add_argument( + "-write_rr_graphs", + default=False, + action="store_true", + help="Write out rr_graph files from VPR. These are normally computed on the fly" + "and can become very large. Typically used with -use_previous [...] to save time" + "on later executions for large tasks.", + ) + + parser.add_argument( + "-write_lookaheads", + default=False, + action="store_true", + help="Write out router lookahead files from VPR. These are normally computed on the fly" + "and can become very large. Typically used with -use_previous [...] to save time on" + "later executions for large tasks.", + ) + + parser.add_argument( + "-use_previous", + default=None, + type=argparse_use_previous, + help="Reuse intermediate [file]s from previous [run]s of the tasks. Accepts a comma" + 'separated list of [run]:[file] such as "-use_previous run001:place,run001:net".' + 'Works throughout different config parameters: "common" will reuse "common"\'s files etc.' + "Use with caution and try to validate your results with a clean run.", + ) + parser.add_argument( "-s", nargs=argparse.REMAINDER, @@ -214,7 +243,7 @@ def vtr_command_argparser(prog=None): return parser -def vtr_command_main(arg_list, prog=None): +def vtr_command_main(arg_list, prog=None) -> int: """Run the vtr tasks given and the tasks in the lists given""" # Load the arguments args = vtr_command_argparser(prog).parse_args(arg_list) @@ -266,10 +295,7 @@ def vtr_command_main(arg_list, prog=None): return num_failed -def run_tasks( - args, - configs, -): +def run_tasks(args, configs) -> int: """ Runs the specified set of tasks (configs) """ @@ -278,6 +304,7 @@ def run_tasks( jobs = create_jobs(args, configs) + # Determine the run dir for each config run_dirs = {} for config in configs: task_dir = find_task_dir(config, args.alt_tasks_dir) @@ -324,24 +351,22 @@ def run_tasks( return num_failed -def run_parallel(args, queued_jobs, run_dirs): +def run_parallel(args, queued_jobs, run_dirs: dict) -> int: """ Run each external command in commands with at most args.j commands running in parllel """ - # Determine the run dir for each config # We pop off the jobs of queued_jobs, which python does from the end, # so reverse the list now so we get the expected order. This also ensures # we are working with a copy of the jobs queued_jobs = list(reversed(queued_jobs)) - # Find the max taskname length for pretty printing queued_procs = [] queue = Manager().Queue() for job in queued_jobs: - queued_procs += [(queue, run_dirs, job, args.script)] - # Queue of currently running subprocesses + queued_procs.append((queue, run_dirs, job, args.script)) + # Queue of currently running subprocesses num_failed = 0 with Pool(processes=args.j) as pool: for proc in queued_procs: @@ -451,15 +476,16 @@ def format_human_readable_memory(num_bytes): return "%.2f GiB" % (num_bytes / (1024 ** 3)) -def run_vtr_flow_process(queue, run_dirs, job, script): +def run_vtr_flow_process(queue, run_dirs, job, script) -> None: """ - This is the function that the multiprocessing calls. - It runs the vtr flow and allerts the multiprocessor through a queue if the flow failed. + This is the function called by multiprocessing.Pool. + It runs the VTR flow and alerts the caller through the queue if the flow failed. """ work_dir = job.work_dir(run_dirs[job.task_name()]) Path(work_dir).mkdir(parents=True, exist_ok=True) out = None vtr_flow_out = str(PurePath(work_dir) / "vtr_flow.out") + with open(vtr_flow_out, "w+") as out_file: with redirect_stdout(out_file): if script == "run_vtr_flow.py": diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_bidir/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_bidir/config/golden_results.txt index 5fc8a907e0e..62d595b7939 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_bidir/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_bidir/config/golden_results.txt @@ -1,41 +1,41 @@ - arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time - k4_n4_v7_bidir.xml alu4.blif common 15.83 vpr 66.14 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 475 14 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67724 14 8 1536 1544 0 1091 497 24 24 576 clb auto 28.7 MiB 0.39 13942 66.1 MiB 1.06 0.02 14.3013 -98.9257 -14.3013 nan 1.70 0.00346327 0.00289139 0.230893 0.199565 26 21853 45 1.452e+07 1.425e+07 -1 -1 7.05 1.16337 0.99243 19779 20 7332 24540 2677471 250087 17.8805 nan -127.439 -17.8805 0 0 -1 -1 0.78 0.88 0.204904 0.181261 - k4_n4_v7_bidir.xml apex2.blif common 23.15 vpr 69.80 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 600 38 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 71472 38 3 1916 1919 0 1509 641 27 27 729 clb auto 32.1 MiB 0.52 19853 69.8 MiB 1.65 0.02 14.54 -43.2406 -14.54 nan 2.20 0.00452267 0.00387307 0.346312 0.296325 30 32859 39 1.875e+07 1.8e+07 -1 -1 11.69 1.49844 1.27385 27714 21 10259 34268 3089635 267226 17.6599 nan -51.7945 -17.6599 0 0 -1 -1 1.03 1.08 0.279537 0.246762 - k4_n4_v7_bidir.xml apex4.blif common 21.42 vpr 64.47 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 408 9 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66020 9 19 1271 1290 0 990 436 23 23 529 clb auto 26.7 MiB 0.29 13674 64.5 MiB 0.81 0.01 13.5441 -217.965 -13.5441 nan 1.38 0.00269035 0.00225276 0.175909 0.150396 31 22546 42 1.323e+07 1.224e+07 -1 -1 14.16 1.31427 1.12111 20061 23 8457 31679 3326721 275480 17.9807 nan -271.917 -17.9807 0 0 -1 -1 0.65 0.93 0.197944 0.174647 - k4_n4_v7_bidir.xml bigkey.blif common 22.30 vpr 69.82 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 456 229 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 71500 229 197 2152 2349 1 1587 882 29 29 841 io auto 32.2 MiB 0.38 12538 69.8 MiB 2.49 0.03 7.21536 -1752.91 -7.21536 7.21536 2.67 0.00525031 0.00457969 0.500691 0.435747 18 19948 37 2.187e+07 1.368e+07 -1 -1 9.75 2.11662 1.84808 18176 20 8122 22577 1592601 166640 11.2146 11.2146 -2409.11 -11.2146 0 0 -1 -1 0.74 0.79 0.30778 0.275313 - k4_n4_v7_bidir.xml clma.blif common 234.10 vpr 170.72 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2523 62 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 174816 62 82 8460 8542 1 6360 2667 53 53 2809 clb auto 89.6 MiB 2.25 105968 164.4 MiB 23.34 0.23 27.4331 -1322.53 -27.4331 27.4331 12.24 0.0308254 0.0236989 3.06851 2.37813 40 140123 34 7.803e+07 7.569e+07 -1 -1 158.96 15.1702 12.2176 136089 20 39117 140993 15651359 1264440 31.7989 31.7989 -1630.05 -31.7989 0 0 -1 -1 6.26 5.77 1.57688 1.33296 - k4_n4_v7_bidir.xml des.blif common 25.14 vpr 68.68 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 449 256 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 70328 256 245 1847 2092 0 1443 950 34 34 1156 io auto 31.1 MiB 0.48 16009 68.7 MiB 2.09 0.03 13.677 -2250.92 -13.677 nan 3.82 0.0051129 0.00449245 0.434618 0.385361 19 27159 49 3.072e+07 1.347e+07 -1 -1 9.27 1.90217 1.68731 21902 21 9383 31021 2553814 254641 15.7393 nan -2823.9 -15.7393 0 0 -1 -1 1.11 0.95 0.323954 0.294406 - k4_n4_v7_bidir.xml diffeq.blif common 21.50 vpr 67.31 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 416 64 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68928 64 39 1935 1974 1 1104 519 23 23 529 clb auto 29.8 MiB 0.39 10529 67.3 MiB 1.08 0.02 10.9681 -2557.3 -10.9681 10.9681 1.49 0.00322534 0.00270723 0.257513 0.218299 24 15464 30 1.323e+07 1.248e+07 -1 -1 13.73 1.90384 1.62521 14273 18 6345 20912 1422376 142071 14.1668 14.1668 -3260.29 -14.1668 0 0 -1 -1 0.56 0.59 0.222919 0.198972 - k4_n4_v7_bidir.xml dsip.blif common 26.28 vpr 66.97 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 390 229 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68576 229 197 1815 2012 1 1190 816 29 29 841 io auto 29.4 MiB 0.40 11690 67.0 MiB 2.12 0.03 6.8611 -1690.21 -6.8611 6.8611 2.68 0.0050109 0.00437603 0.481771 0.424062 20 16981 31 2.187e+07 1.17e+07 -1 -1 14.15 2.78838 2.46208 16401 19 6939 22609 1711858 173518 9.00042 9.00042 -2323.87 -9.00042 0 0 -1 -1 0.81 0.72 0.25883 0.232274 - k4_n4_v7_bidir.xml elliptic.blif common 58.71 vpr 87.12 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 996 131 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 89208 131 114 4855 4969 1 2150 1241 34 34 1156 clb auto 50.3 MiB 0.91 31537 87.1 MiB 5.07 0.05 20.2775 -11159.4 -20.2775 20.2775 3.77 0.0114839 0.0102157 1.16151 0.969182 31 46165 32 3.072e+07 2.988e+07 -1 -1 36.58 5.98533 4.9975 41657 20 11221 49578 4472422 377984 25.2322 25.2322 -14303.6 -25.2322 0 0 -1 -1 1.76 2.02 0.759028 0.664073 - k4_n4_v7_bidir.xml ex1010.blif common 78.62 vpr 101.43 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1500 10 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 103860 10 10 4608 4618 0 3623 1520 41 41 1681 clb auto 56.4 MiB 1.07 45715 96.9 MiB 7.83 0.09 24.1535 -234.623 -24.1535 nan 5.79 0.0126259 0.0101241 1.16696 0.933692 31 64748 22 4.563e+07 4.5e+07 -1 -1 46.28 6.62885 5.38123 63842 19 24184 95794 7338432 673015 28.1535 nan -272.178 -28.1535 0 0 -1 -1 2.61 2.82 0.77112 0.656492 - k4_n4_v7_bidir.xml ex5p.blif common 16.44 vpr 63.26 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 346 8 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 64776 8 63 1072 1135 0 907 417 21 21 441 clb auto 25.2 MiB 0.25 11674 63.3 MiB 0.74 0.01 12.156 -529.016 -12.156 nan 1.17 0.0025199 0.00210768 0.164226 0.139948 31 18183 40 1.083e+07 1.038e+07 -1 -1 10.14 1.23969 1.06525 16242 20 7802 27295 2670855 235779 14.2294 nan -670.483 -14.2294 0 0 -1 -1 0.53 0.78 0.167255 0.149409 - k4_n4_v7_bidir.xml frisc.blif common 74.25 vpr 86.33 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1046 20 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 88404 20 116 4445 4561 1 2328 1182 35 35 1225 clb auto 49.6 MiB 1.04 38690 86.3 MiB 4.69 0.06 22.1068 -12381.1 -22.1068 22.1068 4.05 0.0107939 0.00892273 0.93884 0.780659 35 56138 31 3.267e+07 3.138e+07 -1 -1 47.76 4.85605 4.05266 58253 32 18022 79336 18040863 1779016 31.3536 31.3536 -18068.5 -31.3536 0 0 -1 -1 2.27 5.14 0.88852 0.759392 - k4_n4_v7_bidir.xml misex3.blif common 24.78 vpr 65.42 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 432 14 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66992 14 14 1411 1425 0 1075 460 23 23 529 clb auto 27.7 MiB 0.34 13737 65.4 MiB 0.90 0.01 12.3103 -158.671 -12.3103 nan 1.41 0.00298404 0.00253358 0.20526 0.175635 29 22259 43 1.323e+07 1.296e+07 -1 -1 17.02 1.68043 1.44078 21241 34 9145 30640 4608032 460008 21.1012 nan -254.19 -21.1012 0 0 -1 -1 0.62 1.29 0.294695 0.258387 - k4_n4_v7_bidir.xml pdc.blif common 165.73 vpr 105.43 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1529 16 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 107964 16 40 4591 4631 0 3652 1585 42 42 1764 clb auto 57.8 MiB 1.37 69834 100.9 MiB 8.33 0.09 23.9473 -809.763 -23.9473 nan 6.00 0.013955 0.0109497 1.31786 1.05072 44 101648 40 4.8e+07 4.587e+07 -1 -1 127.75 7.7425 6.35847 95769 22 24754 101600 14856401 1125885 28.5765 nan -985.808 -28.5765 0 0 -1 -1 4.33 4.50 0.900098 0.76374 - k4_n4_v7_bidir.xml s298.blif common 35.07 vpr 70.08 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 569 4 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 71760 4 6 1942 1948 1 1189 579 26 26 676 clb auto 32.3 MiB 0.36 13896 70.1 MiB 1.30 0.02 20.7279 -165.383 -20.7279 20.7279 1.89 0.00389191 0.0032475 0.300862 0.256145 24 20608 26 1.728e+07 1.707e+07 -1 -1 25.65 2.54964 2.16906 19579 21 7384 38268 2760226 242824 24.5168 24.5168 -203.276 -24.5168 0 0 -1 -1 0.69 1.00 0.281349 0.246752 - k4_n4_v7_bidir.xml s38417.blif common 111.91 vpr 123.41 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1735 29 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 126372 29 106 7534 7640 1 4766 1870 44 44 1936 clb auto 72.6 MiB 1.62 46511 116.3 MiB 11.43 0.12 16.9452 -10160.4 -16.9452 16.9452 7.34 0.0206342 0.016187 2.06405 1.61281 24 62728 32 5.292e+07 5.205e+07 -1 -1 69.73 9.36885 7.51033 59411 23 28324 92709 6398688 623979 20.8766 20.8766 -12726.8 -20.8766 0 0 -1 -1 2.61 3.15 1.29949 1.09371 - k4_n4_v7_bidir.xml s38584.1.blif common 82.18 vpr 120.95 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1647 38 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 123848 38 304 7475 7779 1 4418 1989 43 43 1849 clb auto 71.7 MiB 1.79 43812 113.0 MiB 11.59 0.12 12.3351 -8509.09 -12.3351 12.3351 7.03 0.0229826 0.0177449 2.24079 1.75992 24 56547 28 5.043e+07 4.941e+07 -1 -1 42.06 8.32958 6.80777 54274 16 21610 64448 4240686 438237 14.7928 14.7928 -10283.9 -14.7928 0 0 -1 -1 2.40 2.28 1.10031 0.960036 - k4_n4_v7_bidir.xml seq.blif common 30.13 vpr 68.78 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 539 41 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 70428 41 35 1791 1826 0 1383 615 26 26 676 clb auto 30.8 MiB 0.42 19115 68.8 MiB 1.44 0.02 13.3419 -389.664 -13.3419 nan 1.83 0.00420194 0.00355043 0.30572 0.258225 33 30619 34 1.728e+07 1.617e+07 -1 -1 20.01 2.28401 1.94489 27841 23 10485 35577 4762905 404236 20.6267 nan -540.416 -20.6267 0 0 -1 -1 0.89 1.28 0.282439 0.247772 - k4_n4_v7_bidir.xml spla.blif common 83.58 vpr 87.19 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1232 16 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 89280 16 46 3706 3752 0 2880 1294 38 38 1444 clb auto 48.8 MiB 1.13 48254 86.3 MiB 5.60 0.06 19.5643 -647.519 -19.5643 nan 5.13 0.0099415 0.0083197 0.862476 0.721286 39 69193 30 3.888e+07 3.696e+07 -1 -1 50.03 4.84516 4.046 76711 50 24953 105014 26719775 2833440 41.2657 nan -1355.7 -41.2657 0 0 -1 -1 2.93 7.85 1.33119 1.1324 - k4_n4_v7_bidir.xml tseng.blif common 10.88 vpr 63.74 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 279 52 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 65268 52 122 1483 1605 1 736 453 19 19 361 clb auto 25.9 MiB 0.26 5998 63.7 MiB 0.74 0.01 9.22752 -2100.26 -9.22752 9.22752 0.94 0.00288372 0.00243945 0.194225 0.167496 20 9811 38 8.67e+06 8.37e+06 -1 -1 5.60 0.967693 0.839724 9832 34 5019 17191 1401826 152941 15.872 15.872 -3145.51 -15.872 0 0 -1 -1 0.28 0.62 0.262287 0.231798 - k4_n4_v7_l1_bidir.xml alu4.blif common 37.81 vpr 66.50 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 475 14 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68092 14 8 1536 1544 0 1091 497 24 24 576 clb auto 29.0 MiB 0.39 14339 66.5 MiB 1.13 0.02 17.4108 -123.292 -17.4108 nan 2.44 0.00352533 0.00293658 0.249406 0.214591 22 16731 48 1.452e+07 1.425e+07 -1 -1 26.46 1.39196 1.18814 14531 19 6900 26169 1924215 354074 19.2344 nan -137.937 -19.2344 0 0 -1 -1 0.87 0.96 0.219553 0.195464 - k4_n4_v7_l1_bidir.xml apex2.blif common 41.77 vpr 69.95 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 600 38 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 71624 38 3 1916 1919 0 1509 641 27 27 729 clb auto 32.2 MiB 0.47 20080 69.9 MiB 1.66 0.02 19.7058 -57.3847 -19.7058 nan 3.03 0.00439033 0.00363017 0.350518 0.293699 24 22240 39 1.875e+07 1.8e+07 -1 -1 27.34 1.6676 1.41632 19938 15 8998 31953 2812823 403957 21.1399 nan -61.3992 -21.1399 0 0 -1 -1 1.18 1.10 0.237916 0.209976 - k4_n4_v7_l1_bidir.xml apex4.blif common 55.89 vpr 64.65 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 408 9 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 66204 9 19 1271 1290 0 990 436 23 23 529 clb auto 26.9 MiB 0.32 13794 64.7 MiB 1.00 0.01 16.3098 -260.926 -16.3098 nan 2.20 0.00284875 0.00240277 0.216312 0.1863 26 17279 43 1.323e+07 1.224e+07 -1 -1 44.86 1.65595 1.4134 14673 23 8400 30128 4852239 868729 19.5281 nan -302.082 -19.5281 0 0 -1 -1 0.93 1.57 0.18678 0.165508 - k4_n4_v7_l1_bidir.xml bigkey.blif common 59.79 vpr 69.80 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 456 229 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 71476 229 197 2152 2349 1 1587 882 29 29 841 io auto 32.2 MiB 0.41 12777 69.8 MiB 2.57 0.03 10.262 -2329.89 -10.262 10.262 4.01 0.00446688 0.00389652 0.526181 0.459068 12 12525 35 2.187e+07 1.368e+07 -1 -1 41.83 1.98702 1.7375 11501 13 7601 22043 1133327 218456 11.4168 11.4168 -2677.25 -11.4168 0 0 -1 -1 0.77 0.69 0.232388 0.209825 - k4_n4_v7_l1_bidir.xml clma.blif common 387.06 vpr 229.14 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2523 62 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 234636 62 82 8460 8542 1 6360 2667 53 53 2809 clb auto 89.8 MiB 2.14 106192 215.1 MiB 23.01 0.21 36.3251 -1893.34 -36.3251 36.3251 15.65 0.0276743 0.0214768 2.97814 2.30531 34 109946 49 7.803e+07 7.569e+07 -1 -1 295.03 16.6517 13.4013 101036 15 38135 142858 14510650 2201358 39.0889 39.0889 -2125.06 -39.0889 0 0 -1 -1 8.27 6.42 1.30671 1.11208 - k4_n4_v7_l1_bidir.xml des.blif common 52.49 vpr 83.02 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 449 256 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 85008 256 245 1847 2092 0 1443 950 34 34 1156 io auto 31.1 MiB 0.46 16206 83.0 MiB 2.34 0.03 15.8963 -2848.43 -15.8963 nan 5.85 0.00569889 0.00499205 0.513718 0.455215 14 16731 44 3.072e+07 1.347e+07 -1 -1 29.52 2.13288 1.89721 15602 15 8732 27561 2020539 334580 17.3373 nan -3126.27 -17.3373 0 0 -1 -1 1.24 0.91 0.259974 0.237289 - k4_n4_v7_l1_bidir.xml diffeq.blif common 23.03 vpr 66.89 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 416 64 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68500 64 39 1935 1974 1 1104 519 23 23 529 clb auto 29.4 MiB 0.35 10322 66.9 MiB 1.14 0.02 12.7328 -2801.17 -12.7328 12.7328 2.03 0.0037657 0.00320518 0.296288 0.254271 17 11492 33 1.323e+07 1.248e+07 -1 -1 13.45 1.35561 1.16148 9955 18 6800 22452 1979011 369474 13.6275 13.6275 -3211.75 -13.6275 0 0 -1 -1 0.55 0.81 0.225084 0.200362 - k4_n4_v7_l1_bidir.xml dsip.blif common 74.46 vpr 67.36 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 390 229 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68980 229 197 1815 2012 1 1190 816 29 29 841 io auto 29.7 MiB 0.35 11604 67.4 MiB 2.08 0.04 9.42411 -2261.74 -9.42411 9.42411 3.68 0.00445062 0.00392448 0.468754 0.413106 12 11955 41 2.187e+07 1.17e+07 -1 -1 59.34 2.17216 1.91547 10643 15 6086 20204 1075921 204685 9.94517 9.94517 -2532.48 -9.94517 0 0 -1 -1 0.70 0.61 0.208662 0.188192 - k4_n4_v7_l1_bidir.xml elliptic.blif common 291.45 vpr 96.19 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 996 131 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 98500 131 114 4855 4969 1 2150 1241 34 34 1156 clb auto 50.1 MiB 0.87 32010 95.5 MiB 4.71 0.05 24.9388 -14312.7 -24.9388 24.9388 5.41 0.0112566 0.00932201 1.06129 0.881633 24 35036 37 3.072e+07 2.988e+07 -1 -1 263.09 5.95933 4.95944 31073 16 11649 51210 5097989 770491 26.1994 26.1994 -16217.8 -26.1994 0 0 -1 -1 2.02 2.28 0.598875 0.520244 - k4_n4_v7_l1_bidir.xml ex1010.blif common 116.87 vpr 130.09 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1500 10 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 133216 10 10 4608 4618 0 3623 1520 41 41 1681 clb auto 56.6 MiB 1.20 45417 129.0 MiB 8.38 0.10 31.5928 -307.382 -31.5928 nan 9.46 0.0133068 0.0108108 1.27602 1.03 23 50766 50 4.563e+07 4.5e+07 -1 -1 68.85 7.15812 5.88807 46097 18 27457 108175 9261642 1654533 34.8237 nan -338.947 -34.8237 0 0 -1 -1 3.18 4.33 0.798244 0.683082 - k4_n4_v7_l1_bidir.xml ex5p.blif common 58.98 vpr 63.25 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 346 8 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 64772 8 63 1072 1135 0 907 417 21 21 441 clb auto 25.2 MiB 0.24 11707 63.3 MiB 0.78 0.01 14.0138 -635.593 -14.0138 nan 1.62 0.00225076 0.00190238 0.170507 0.146058 24 14005 44 1.083e+07 1.038e+07 -1 -1 51.12 0.998894 0.856579 12021 18 7862 27162 2532878 379751 14.9959 nan -701.913 -14.9959 0 0 -1 -1 0.64 0.84 0.138605 0.123979 - k4_n4_v7_l1_bidir.xml frisc.blif common 294.46 vpr 98.46 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1046 20 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 100820 20 116 4445 4561 1 2328 1182 35 35 1225 clb auto 49.6 MiB 1.03 38128 98.5 MiB 5.35 0.06 24.6071 -14255.3 -24.6071 24.6071 6.21 0.010811 0.00882732 1.08897 0.880865 28 41843 32 3.267e+07 3.138e+07 -1 -1 261.66 5.57964 4.62948 38402 17 14687 66609 7395387 1236933 26.1498 26.1498 -16183.6 -26.1498 0 0 -1 -1 2.62 3.09 0.641543 0.552798 - k4_n4_v7_l1_bidir.xml misex3.blif common 45.42 vpr 65.48 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 432 14 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67052 14 14 1411 1425 0 1075 460 23 23 529 clb auto 27.8 MiB 0.34 13652 65.5 MiB 0.92 0.01 15.3399 -193.978 -15.3399 nan 2.05 0.00255059 0.00214863 0.194132 0.165695 24 14890 31 1.323e+07 1.296e+07 -1 -1 35.99 1.52646 1.3056 13697 16 7031 25944 2067091 329492 16.2598 nan -209.259 -16.2598 0 0 -1 -1 0.82 0.83 0.167296 0.149142 - k4_n4_v7_l1_bidir.xml pdc.blif common 351.99 vpr 144.71 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1529 16 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 148184 16 40 4591 4631 0 3652 1585 42 42 1764 clb auto 57.9 MiB 1.34 70960 133.9 MiB 8.63 0.08 30.2979 -1034.97 -30.2979 nan 8.96 0.0135504 0.0108423 1.34787 1.09998 38 85927 39 4.8e+07 4.587e+07 -1 -1 296.36 8.87781 7.24709 74779 20 26275 107439 27708692 6169118 32.9609 nan -1147.04 -32.9609 0 0 -1 -1 5.64 9.92 0.882731 0.745583 - k4_n4_v7_l1_bidir.xml s298.blif common 25.98 vpr 69.81 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 569 4 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 71488 4 6 1942 1948 1 1189 579 26 26 676 clb auto 32.0 MiB 0.37 13691 69.8 MiB 1.40 0.02 23.7994 -183.152 -23.7994 23.7994 2.84 0.00422585 0.00357011 0.332188 0.28312 17 15036 32 1.728e+07 1.707e+07 -1 -1 13.08 1.4762 1.25395 13994 18 8431 41065 3419771 506073 25.9166 25.9166 -208.225 -25.9166 0 0 -1 -1 0.73 1.28 0.254819 0.224708 - k4_n4_v7_l1_bidir.xml s38417.blif common 119.42 vpr 150.75 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1735 29 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 154372 29 106 7534 7640 1 4766 1870 44 44 1936 clb auto 72.4 MiB 1.43 46455 150.8 MiB 10.66 0.10 24.0997 -13127.2 -24.0997 24.0997 9.83 0.0173155 0.0135527 1.88648 1.49661 18 43755 41 5.292e+07 5.205e+07 -1 -1 68.84 7.89494 6.41087 39949 16 23269 73386 4307458 765621 25.5158 25.5158 -15320.9 -25.5158 0 0 -1 -1 2.67 2.55 0.992678 0.843564 - k4_n4_v7_l1_bidir.xml s38584.1.blif common 77.45 vpr 146.06 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1647 38 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 149568 38 304 7475 7779 1 4418 1989 43 43 1849 clb auto 71.8 MiB 1.62 44099 146.1 MiB 10.49 0.18 16.9276 -11315.8 -16.9276 16.9276 8.97 0.0179745 0.0147437 1.90827 1.5447 19 43478 45 5.043e+07 4.941e+07 -1 -1 29.43 7.16 5.93978 37721 12 20029 61320 3570939 616978 17.3252 17.3252 -12579 -17.3252 0 0 -1 -1 2.53 2.05 0.816079 0.709693 - k4_n4_v7_l1_bidir.xml seq.blif common 100.35 vpr 68.76 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 539 41 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 70412 41 35 1791 1826 0 1383 615 26 26 676 clb auto 30.8 MiB 0.41 18206 68.8 MiB 1.62 0.02 16.1287 -488.943 -16.1287 nan 2.76 0.00408409 0.00341885 0.347706 0.296809 24 20747 34 1.728e+07 1.617e+07 -1 -1 87.50 2.00024 1.70629 18150 14 8667 31199 2709352 407078 18.8185 nan -548.896 -18.8185 0 0 -1 -1 1.08 1.03 0.206614 0.184575 - k4_n4_v7_l1_bidir.xml spla.blif common 280.38 vpr 113.89 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1232 16 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 116624 16 46 3706 3752 0 2880 1294 38 38 1444 clb auto 48.7 MiB 1.07 48447 113.9 MiB 5.85 0.06 26.0279 -876.886 -26.0279 nan 7.24 0.00947702 0.00785986 0.991166 0.804458 32 52729 43 3.888e+07 3.696e+07 -1 -1 241.57 5.43955 4.49834 50648 23 21722 92223 14909522 2641834 28.7306 nan -1008.44 -28.7306 0 0 -1 -1 3.54 5.45 0.726388 0.620426 - k4_n4_v7_l1_bidir.xml tseng.blif common 12.05 vpr 63.79 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 279 52 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 65320 52 122 1483 1605 1 736 453 19 19 361 clb auto 26.0 MiB 0.21 5885 63.8 MiB 0.69 0.01 9.71655 -2503.73 -9.71655 9.71655 1.21 0.00263139 0.00220299 0.179159 0.152884 15 6962 36 8.67e+06 8.37e+06 -1 -1 6.32 1.12812 0.970812 5511 18 4332 15214 701482 135162 10.6732 10.6732 -2953.15 -10.6732 0 0 -1 -1 0.31 0.39 0.156168 0.139122 +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +k4_n4_v7_bidir.xml alu4.blif common 17.97 vpr 69.13 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 475 14 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 70788 14 8 1536 1544 0 1091 497 24 24 576 clb auto 31.6 MiB 0.29 14174 69.1 MiB 1.05 0.01 13.4464 -91.906 -13.4464 nan 1.36 0.00347385 0.00290782 0.247999 0.211234 28 20910 32 1.452e+07 1.425e+07 -1 -1 10.82 1.44391 1.22437 21174 279108 -1 19878 20 7201 27995 2276505 212795 0 0 2276505 212795 16951 11554 0 0 31392 28016 0 0 50562 32519 0 0 53034 24138 0 0 1089394 57817 0 0 1035172 58751 0 0 16951 0 0 12554 113703 115472 357504 11933 2267 16.2487 nan -109.749 -16.2487 0 0 -1 -1 0.57 0.72 0.17 -1 -1 0.57 0.18904 0.168713 +k4_n4_v7_bidir.xml apex2.blif common 22.29 vpr 72.88 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 600 38 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 74632 38 3 1916 1919 0 1509 641 27 27 729 clb auto 35.0 MiB 0.38 19839 72.9 MiB 1.44 0.02 14.9286 -44.0658 -14.9286 nan 1.71 0.00447423 0.0037738 0.318034 0.271255 31 29152 43 1.875e+07 1.8e+07 -1 -1 13.08 1.89132 1.61731 28210 394495 -1 28088 18 10308 35327 3215747 279851 0 0 3215747 279851 29720 16267 0 0 39742 35335 0 0 61341 40948 0 0 80107 33828 0 0 1543669 76168 0 0 1461168 77305 0 0 29720 0 0 24742 194098 209672 870568 6388 201 17.3073 nan -51.5022 -17.3073 0 0 -1 -1 0.80 0.88 0.22 -1 -1 0.80 0.204316 0.178519 +k4_n4_v7_bidir.xml apex4.blif common 20.47 vpr 67.20 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 408 9 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68812 9 19 1271 1290 0 990 436 23 23 529 clb auto 29.6 MiB 0.24 13522 67.2 MiB 0.88 0.01 12.9459 -210.249 -12.9459 nan 1.31 0.00304529 0.00263188 0.202833 0.176455 31 21733 44 1.323e+07 1.224e+07 -1 -1 13.75 1.2743 1.09421 20514 283063 -1 19523 24 8011 29398 3111419 256159 0 0 3111419 256159 27108 14933 0 0 33129 29452 0 0 53736 33902 0 0 81514 31763 0 0 1464504 74767 0 0 1451428 71342 0 0 27108 0 0 31372 225582 235236 1191218 2710 504 16.6567 nan -264.732 -16.6567 0 0 -1 -1 0.57 0.82 0.17 -1 -1 0.57 0.173296 0.153258 +k4_n4_v7_bidir.xml bigkey.blif common 26.60 vpr 73.27 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 456 229 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 75028 229 197 2152 2349 1 1587 882 29 29 841 io auto 35.1 MiB 0.30 12959 73.3 MiB 2.51 0.02 7.48553 -1803.94 -7.48553 7.48553 2.28 0.00469204 0.00410364 0.51071 0.442793 18 20371 48 2.187e+07 1.368e+07 -1 -1 15.57 1.94898 1.6994 25794 279159 -1 18368 19 8448 24780 1743257 182995 0 0 1743257 182995 13766 10049 0 0 30505 25889 0 0 47823 31434 0 0 40964 21410 0 0 806666 46627 0 0 803533 47586 0 0 13766 0 0 6197 80865 80423 213680 11837 3693 9.06144 9.06144 -2390.66 -9.06144 0 0 -1 -1 0.61 0.68 0.17 -1 -1 0.61 0.253486 0.225627 +k4_n4_v7_bidir.xml clma.blif common 142.35 vpr 187.99 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2523 62 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 192504 62 82 8460 8542 1 6360 2667 53 53 2809 clb auto 92.3 MiB 1.80 106462 171.3 MiB 18.59 0.16 27.3694 -1405.65 -27.3694 27.3694 9.76 0.0244187 0.0196415 2.5024 2.05173 39 139434 27 7.803e+07 7.569e+07 -1 -1 76.80 10.1302 8.39795 121914 1953961 -1 144525 31 49683 171853 40636067 3446563 0 0 40636067 3446563 131588 83133 0 0 195439 172145 0 0 321140 204566 0 0 417577 203354 0 0 19358442 1426844 0 0 20211881 1356521 0 0 131588 0 0 119534 1007982 997442 3452968 44789 50391 35.3515 35.3515 -1874.87 -35.3515 0 0 -1 -1 5.12 10.08 1.18 -1 -1 5.12 1.68323 1.4069 +k4_n4_v7_bidir.xml des.blif common 23.62 vpr 71.27 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 449 256 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 72980 256 245 1847 2092 0 1443 950 34 34 1156 io auto 33.6 MiB 0.35 16116 71.3 MiB 1.98 0.03 12.1555 -2310.02 -12.1555 nan 3.25 0.00544401 0.0047579 0.394496 0.349342 20 23620 43 3.072e+07 1.347e+07 -1 -1 9.87 1.76085 1.56242 36518 419916 -1 22263 23 10124 34066 3025249 296853 0 0 3025249 296853 31691 18908 0 0 39976 35072 0 0 66141 41206 0 0 79559 38882 0 0 1378551 82326 0 0 1429331 80459 0 0 31691 0 0 27667 129057 127151 621415 3326 4 15.4638 nan -2935.27 -15.4638 0 0 -1 -1 0.98 0.93 0.28 -1 -1 0.98 0.281328 0.254279 +k4_n4_v7_bidir.xml diffeq.blif common 18.57 vpr 70.11 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 416 64 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 71796 64 39 1935 1974 1 1104 519 23 23 529 clb auto 32.6 MiB 0.29 10612 70.1 MiB 1.04 0.02 11.2136 -2465.35 -11.2136 11.2136 1.32 0.00454802 0.00396332 0.310543 0.268356 24 15679 28 1.323e+07 1.248e+07 -1 -1 11.93 1.60313 1.37816 18402 227975 -1 14539 21 6424 21196 1456215 145994 0 0 1456215 145994 17973 9193 0 0 24483 21325 0 0 39605 25276 0 0 51419 20779 0 0 650016 35238 0 0 672719 34183 0 0 17973 0 0 17677 75015 74106 385162 3943 1788 15.6994 15.6994 -3159.95 -15.6994 0 0 -1 -1 0.45 0.56 0.13 -1 -1 0.45 0.220429 0.194677 +k4_n4_v7_bidir.xml dsip.blif common 17.61 vpr 69.86 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 390 229 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 71540 229 197 1815 2012 1 1190 816 29 29 841 io auto 32.2 MiB 0.29 11338 69.9 MiB 2.10 0.03 6.78424 -1682.3 -6.78424 6.78424 2.26 0.00461262 0.0039929 0.459096 0.396794 18 17527 41 2.187e+07 1.17e+07 -1 -1 7.42 1.81094 1.58864 25794 279159 -1 15395 15 6452 19926 1299311 140436 0 0 1299311 140436 12157 7470 0 0 24890 21082 0 0 37977 25483 0 0 31685 15748 0 0 605396 34414 0 0 587206 36239 0 0 12157 0 0 6413 50387 50201 110246 8260 1780 9.01728 9.01728 -2205.55 -9.01728 0 0 -1 -1 0.60 0.50 0.17 -1 -1 0.60 0.18749 0.167586 +k4_n4_v7_bidir.xml elliptic.blif common 69.11 vpr 89.86 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 996 131 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 92020 131 114 4855 4969 1 2150 1241 34 34 1156 clb auto 53.1 MiB 0.73 31456 89.9 MiB 4.26 0.05 18.9025 -10909.3 -18.9025 18.9025 3.29 0.0101721 0.00908163 0.968685 0.817533 30 52978 48 3.072e+07 2.988e+07 -1 -1 50.43 5.12544 4.34518 44604 633776 -1 42011 19 11537 51254 4572824 386879 0 0 4572824 386879 38004 16059 0 0 59624 51865 0 0 94572 61167 0 0 101837 34763 0 0 2152730 110268 0 0 2126057 112757 0 0 38004 0 0 43133 327755 335053 1351042 15185 11666 23.0444 23.0444 -14356.4 -23.0444 0 0 -1 -1 1.47 1.62 0.38 -1 -1 1.47 0.538091 0.473642 +k4_n4_v7_bidir.xml ex1010.blif common 71.35 vpr 111.18 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1500 10 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 113844 10 10 4608 4618 0 3623 1520 41 41 1681 clb auto 59.5 MiB 0.95 45872 101.7 MiB 7.25 0.08 24.5792 -235.267 -24.5792 nan 5.30 0.0111252 0.00889648 1.06433 0.869968 31 65695 22 4.563e+07 4.5e+07 -1 -1 41.62 5.2262 4.28588 64722 929407 -1 64534 20 24936 100574 7806517 709051 0 0 7806517 709051 60509 37981 0 0 114508 100614 0 0 184637 118813 0 0 184407 78252 0 0 3667667 182901 0 0 3594789 190490 0 0 60509 0 0 44495 471267 474245 1414540 43464 21867 27.7913 nan -270.144 -27.7913 0 0 -1 -1 2.35 2.58 0.64 -1 -1 2.35 0.591769 0.508861 +k4_n4_v7_bidir.xml ex5p.blif common 15.39 vpr 65.90 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 346 8 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67484 8 63 1072 1135 0 907 417 21 21 441 clb auto 28.1 MiB 0.21 11634 65.9 MiB 0.73 0.01 11.978 -548.759 -11.978 nan 1.07 0.00292061 0.00254546 0.181734 0.159563 32 17719 34 1.083e+07 1.038e+07 -1 -1 9.16 1.16136 1.01254 17562 246361 -1 18739 28 9438 30715 5369320 511376 0 0 5369320 511376 28940 20285 0 0 35234 31047 0 0 55404 36228 0 0 99566 49748 0 0 2568030 187880 0 0 2582146 186188 0 0 28940 0 0 31517 145037 153285 763874 1914 39 21.5878 nan -899.15 -21.5878 0 0 -1 -1 0.48 1.20 0.14 -1 -1 0.48 0.182491 0.160903 +k4_n4_v7_bidir.xml frisc.blif common 57.61 vpr 91.26 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1046 20 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 93448 20 116 4445 4561 1 2328 1182 35 35 1225 clb auto 52.4 MiB 0.85 38585 89.7 MiB 4.32 0.05 22.1146 -12365.8 -22.1146 22.1146 3.54 0.0115452 0.00973288 0.969395 0.812984 35 57472 38 3.267e+07 3.138e+07 -1 -1 35.21 4.54213 3.84136 50922 772933 -1 56598 25 17222 77259 15811791 1477237 0 0 15811791 1477237 63030 30948 0 0 89154 78174 0 0 147213 91749 0 0 172679 78915 0 0 7527610 607462 0 0 7812105 589989 0 0 63030 0 0 62103 410569 411653 1666140 15685 11675 32.2679 32.2679 -18125.6 -32.2679 0 0 -1 -1 1.83 3.92 0.47 -1 -1 1.83 0.668168 0.583883 +k4_n4_v7_bidir.xml misex3.blif common 18.76 vpr 68.10 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 432 14 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 69736 14 14 1411 1425 0 1075 460 23 23 529 clb auto 30.4 MiB 0.32 13609 68.1 MiB 0.96 0.01 12.4424 -149.071 -12.4424 nan 1.35 0.00328638 0.00288741 0.228776 0.196418 29 22322 42 1.323e+07 1.296e+07 -1 -1 11.52 1.19383 1.01742 19986 270173 -1 20348 28 8859 29487 3869933 348873 0 0 3869933 348873 23845 15825 0 0 33114 29526 0 0 53651 34352 0 0 74663 38033 0 0 1876058 113330 0 0 1808602 117807 0 0 23845 0 0 20205 114520 122449 437755 6445 279 16.7203 nan -203.293 -16.7203 0 0 -1 -1 0.55 0.97 0.17 -1 -1 0.55 0.197335 0.171467 +k4_n4_v7_bidir.xml pdc.blif common 130.05 vpr 116.83 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1529 16 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 119632 16 40 4591 4631 0 3652 1585 42 42 1764 clb auto 60.8 MiB 1.24 69136 105.4 MiB 7.51 0.08 21.6179 -743.475 -21.6179 nan 5.52 0.0128612 0.0103429 1.12296 0.911833 44 100825 34 4.8e+07 4.587e+07 -1 -1 95.65 5.44737 4.46483 83766 1407084 -1 96776 21 25984 111801 17151035 1289269 0 0 17151035 1289269 81490 43452 0 0 126633 111956 0 0 217767 132486 0 0 235627 91401 0 0 8216199 458881 0 0 8273319 451093 0 0 81490 0 0 81080 838833 866443 2851501 34177 15307 25.8078 nan -905.706 -25.8078 0 0 -1 -1 3.58 4.36 0.96 -1 -1 3.58 0.64678 0.549082 +k4_n4_v7_bidir.xml s298.blif common 23.68 vpr 72.97 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 569 4 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 74720 4 6 1942 1948 1 1189 579 26 26 676 clb auto 35.2 MiB 0.32 13902 73.0 MiB 1.25 0.02 21.2653 -159.337 -21.2653 21.2653 1.70 0.00404275 0.00347083 0.296835 0.256916 28 19709 22 1.728e+07 1.707e+07 -1 -1 14.84 2.01907 1.72455 24822 329400 -1 19479 18 6634 35892 2626126 230557 0 0 2626126 230557 17008 10168 0 0 40943 35962 0 0 66535 42234 0 0 57260 21984 0 0 1227379 60688 0 0 1217001 59521 0 0 17008 0 0 21198 263909 258893 1051566 21162 19368 25.4875 25.4875 -197.762 -25.4875 0 0 -1 -1 0.73 0.88 0.22 -1 -1 0.73 0.235005 0.206912 +k4_n4_v7_bidir.xml s38417.blif common 88.72 vpr 128.77 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1735 29 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 131864 29 106 7534 7640 1 4766 1870 44 44 1936 clb auto 75.3 MiB 1.27 47115 120.9 MiB 10.49 0.11 18.2412 -10727.7 -18.2412 18.2412 6.18 0.0192554 0.0154887 1.84109 1.49575 24 61704 39 5.292e+07 5.205e+07 -1 -1 53.07 7.46909 6.1302 66744 864380 -1 58905 18 26720 83763 5378251 555913 0 0 5378251 555913 64683 36591 0 0 97467 84195 0 0 156057 101739 0 0 183892 79757 0 0 2433961 124956 0 0 2442191 128675 0 0 64683 0 0 45122 234304 228759 879435 20330 25528 21.4718 21.4718 -13465.3 -21.4718 0 0 -1 -1 2.17 2.21 0.58 -1 -1 2.17 0.829587 0.707689 +k4_n4_v7_bidir.xml s38584.1.blif common 77.28 vpr 126.29 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1647 38 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 129316 38 304 7475 7779 1 4418 1989 43 43 1849 clb auto 74.6 MiB 1.38 45240 117.8 MiB 10.45 0.13 11.9263 -8962.66 -11.9263 11.9263 5.89 0.0184976 0.0147338 1.85878 1.51041 24 60919 48 5.043e+07 4.941e+07 -1 -1 42.74 6.636 5.48201 63762 824815 -1 56541 19 22387 66663 4809398 478500 0 0 4809398 478500 59252 29333 0 0 78504 67676 0 0 120843 81329 0 0 155160 66235 0 0 2228270 113561 0 0 2167369 120366 0 0 59252 0 0 44499 186100 192412 1025254 7854 11555 13.7507 13.7507 -10930.9 -13.7507 0 0 -1 -1 2.06 2.13 0.55 -1 -1 2.06 0.921664 0.803959 +k4_n4_v7_bidir.xml seq.blif common 28.17 vpr 71.55 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 539 41 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 73264 41 35 1791 1826 0 1383 615 26 26 676 clb auto 33.7 MiB 0.37 18103 71.5 MiB 1.51 0.02 14.1718 -400.404 -14.1718 nan 1.77 0.00447893 0.0037976 0.340345 0.291858 30 31057 47 1.728e+07 1.617e+07 -1 -1 19.01 1.73627 1.48359 26172 364912 -1 25380 16 8985 30629 2861962 250537 0 0 2861962 250537 25106 13969 0 0 34884 30763 0 0 54020 35946 0 0 69763 29606 0 0 1389883 68765 0 0 1288306 71488 0 0 25106 0 0 22096 153117 158712 646194 6097 806 16.758 nan -485.01 -16.758 0 0 -1 -1 0.78 0.80 0.23 -1 -1 0.78 0.188967 0.168217 +k4_n4_v7_bidir.xml spla.blif common 73.80 vpr 97.20 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1232 16 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 99532 16 46 3706 3752 0 2880 1294 38 38 1444 clb auto 51.7 MiB 0.91 48512 89.3 MiB 4.85 0.05 19.8708 -663.452 -19.8708 nan 4.31 0.00929601 0.00722443 0.787301 0.650829 39 72151 32 3.888e+07 3.696e+07 -1 -1 45.77 3.98785 3.3423 62858 992060 -1 77542 37 25589 107341 26668651 2627948 0 0 26668651 2627948 77722 50767 0 0 121306 107522 0 0 198387 125323 0 0 254326 130878 0 0 12808265 1118501 0 0 13208645 1094957 0 0 77722 0 0 83595 688469 708121 2254809 35056 6239 34.9081 nan -1115.52 -34.9081 0 0 -1 -1 2.46 6.22 0.65 -1 -1 2.46 0.751 0.637103 +k4_n4_v7_bidir.xml tseng.blif common 8.40 vpr 66.36 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 279 52 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67948 52 122 1483 1605 1 736 453 19 19 361 clb auto 28.6 MiB 0.20 6088 66.4 MiB 0.76 0.01 10.2937 -2093.39 -10.2937 10.2937 0.83 0.00332237 0.00290241 0.223239 0.195939 20 9957 42 8.67e+06 8.37e+06 -1 -1 3.73 0.787509 0.686428 11514 125901 -1 9990 29 5011 16576 1439513 162557 0 0 1439513 162557 14011 8384 0 0 19532 16993 0 0 30030 20081 0 0 41245 20221 0 0 654871 49002 0 0 679824 47876 0 0 14011 0 0 11034 35108 33894 139846 2941 1247 19.7201 19.7201 -3163.4 -19.7201 0 0 -1 -1 0.24 0.53 0.08 -1 -1 0.24 0.208626 0.184136 +k4_n4_v7_l1_bidir.xml alu4.blif common 34.84 vpr 69.29 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 475 14 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 70948 14 8 1536 1544 0 1091 497 24 24 576 clb auto 31.7 MiB 0.31 14281 69.3 MiB 1.07 0.02 17.405 -119.79 -17.405 nan 2.13 0.0045875 0.00406708 0.262534 0.225911 22 16271 40 1.452e+07 1.425e+07 -1 -1 25.15 1.22173 1.04401 39160 271852 -1 14226 15 6997 28306 1931113 329186 0 0 1931113 329186 15346 8872 0 0 32065 28332 0 0 61791 32252 0 0 43171 17605 0 0 886288 121135 0 0 892452 120990 0 0 15346 0 0 9226 213153 228005 423480 13610 9293 18.4209 nan -129.929 -18.4209 0 0 -1 -1 0.74 0.76 0.20 -1 -1 0.74 0.153264 0.137365 +k4_n4_v7_l1_bidir.xml apex2.blif common 102.31 vpr 72.82 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 600 38 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 74564 38 3 1916 1919 0 1509 641 27 27 729 clb auto 35.0 MiB 0.43 20018 72.8 MiB 1.71 0.02 19.526 -55.2387 -19.526 nan 2.79 0.00488424 0.0041277 0.383636 0.325401 24 22118 39 1.875e+07 1.8e+07 -1 -1 89.14 2.04687 1.72825 55250 396047 -1 19775 14 8986 31316 2736987 394561 0 0 2736987 394561 25265 11830 0 0 35499 31329 0 0 67573 35812 0 0 60942 23450 0 0 1282512 142192 0 0 1265196 149948 0 0 25265 0 0 17919 420247 467927 1194412 6824 1654 20.5616 nan -58.2176 -20.5616 0 0 -1 -1 1.11 0.97 0.30 -1 -1 1.11 0.18625 0.16592 +k4_n4_v7_l1_bidir.xml apex4.blif common 89.84 vpr 67.37 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 408 9 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68984 9 19 1271 1290 0 990 436 23 23 529 clb auto 29.8 MiB 0.25 13467 67.4 MiB 0.99 0.01 17.1168 -270.116 -17.1168 nan 1.91 0.00297691 0.00256524 0.224708 0.194607 24 16302 38 1.323e+07 1.224e+07 -1 -1 80.97 1.22647 1.05141 39522 283015 -1 13889 16 7146 27459 2781985 364765 0 0 2781985 364765 24039 10954 0 0 31163 27550 0 0 62028 31415 0 0 63511 23479 0 0 1321089 134334 0 0 1280155 137033 0 0 24039 0 0 23482 526544 527019 1611977 3644 1238 18.3367 nan -298.806 -18.3367 0 0 -1 -1 0.74 0.83 0.19 -1 -1 0.74 0.129661 0.115195 +k4_n4_v7_l1_bidir.xml bigkey.blif common 22.67 vpr 73.29 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 456 229 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 75044 229 197 2152 2349 1 1587 882 29 29 841 io auto 35.2 MiB 0.30 12931 73.3 MiB 2.67 0.03 11.5134 -2580.62 -11.5134 11.5134 3.37 0.00552349 0.00477065 0.582477 0.505666 13 12765 29 2.187e+07 1.368e+07 -1 -1 8.20 1.567 1.36759 39906 235943 -1 11969 18 7124 21299 1153521 218194 0 0 1153521 218194 11501 7598 0 0 26289 22170 0 0 45758 26441 0 0 31205 14323 0 0 515445 74325 0 0 523323 73337 0 0 11501 0 0 4674 117311 119848 154219 10522 10930 12.056 12.056 -2911.61 -12.056 0 0 -1 -1 0.68 0.64 0.18 -1 -1 0.68 0.225189 0.201027 +k4_n4_v7_l1_bidir.xml clma.blif common 573.90 vpr 233.20 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2523 62 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 238796 62 82 8460 8542 1 6360 2667 53 53 2809 clb auto 92.3 MiB 1.80 104583 221.5 MiB 18.47 0.16 40.1845 -1767.17 -40.1845 40.1845 13.18 0.0231544 0.0187544 2.44967 1.99032 32 106308 31 7.803e+07 7.569e+07 -1 -1 496.36 10.7853 8.79574 274482 2081397 -1 101996 16 40594 151795 23814315 4432967 0 0 23814315 4432967 100773 51320 0 0 172148 152050 0 0 339425 174315 0 0 268766 110124 0 0 11536818 1957698 0 0 11396385 1987460 0 0 100773 0 0 70259 2317122 2306531 4801468 54291 155481 42.9009 42.9009 -2145.85 -42.9009 0 0 -1 -1 6.67 7.82 1.47 -1 -1 6.67 0.969179 0.826732 +k4_n4_v7_l1_bidir.xml des.blif common 53.11 vpr 87.29 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 449 256 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 89384 256 245 1847 2092 0 1443 950 34 34 1156 io auto 33.8 MiB 0.36 16346 87.3 MiB 2.35 0.02 19.6565 -2858.74 -19.6565 nan 4.77 0.00535069 0.00476587 0.514918 0.457842 14 16500 26 3.072e+07 1.347e+07 -1 -1 34.11 1.87913 1.68679 59520 369080 -1 15478 12 7561 24058 1864200 313580 0 0 1864200 313580 22278 10629 0 0 29001 24933 0 0 53680 29288 0 0 48539 20774 0 0 861985 116664 0 0 848717 111292 0 0 22278 0 0 15933 244240 250313 743674 2444 210 21.4109 nan -3191.29 -21.4109 0 0 -1 -1 1.08 0.68 0.28 -1 -1 1.08 0.174295 0.159883 +k4_n4_v7_l1_bidir.xml diffeq.blif common 24.03 vpr 69.93 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 416 64 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 71604 64 39 1935 1974 1 1104 519 23 23 529 clb auto 32.4 MiB 0.30 10465 69.9 MiB 1.12 0.02 11.8225 -2870.74 -11.8225 11.8225 1.92 0.0043116 0.00367468 0.319898 0.275666 17 11085 37 1.323e+07 1.248e+07 -1 -1 15.27 1.2403 1.06517 30282 197837 -1 10169 18 7282 25044 2184183 406188 0 0 2184183 406188 20367 11188 0 0 28555 25222 0 0 52752 28791 0 0 58766 24448 0 0 1014502 158471 0 0 1009241 158068 0 0 20367 0 0 17479 229597 223773 691660 5435 9780 12.8449 12.8449 -3357.9 -12.8449 0 0 -1 -1 0.48 0.75 0.13 -1 -1 0.48 0.182139 0.159281 +k4_n4_v7_l1_bidir.xml dsip.blif common 25.48 vpr 70.76 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 390 229 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 72456 229 197 1815 2012 1 1190 816 29 29 841 io auto 32.5 MiB 0.30 11724 70.8 MiB 2.16 0.02 9.84842 -2315.87 -9.84842 9.84842 3.39 0.00473118 0.00412819 0.469999 0.410592 13 11676 41 2.187e+07 1.17e+07 -1 -1 11.77 1.44116 1.26298 39906 235943 -1 10782 13 5853 18929 1079825 212814 0 0 1079825 212814 11468 6525 0 0 23746 20136 0 0 39849 23813 0 0 28118 13244 0 0 488247 76528 0 0 488397 72568 0 0 11468 0 0 5835 106807 105576 203100 7872 7184 10.32 10.32 -2567.64 -10.32 0 0 -1 -1 0.63 0.48 0.20 -1 -1 0.63 0.150489 0.135384 +k4_n4_v7_l1_bidir.xml elliptic.blif common 250.40 vpr 102.93 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 996 131 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 105396 131 114 4855 4969 1 2150 1241 34 34 1156 clb auto 53.1 MiB 0.75 31289 99.7 MiB 4.31 0.05 24.5087 -13712.9 -24.5087 24.5087 5.03 0.0112512 0.00926088 0.918318 0.756965 24 33225 33 3.072e+07 2.988e+07 -1 -1 225.15 4.55672 3.79953 89088 639360 -1 29910 15 11471 49628 4991238 807844 0 0 4991238 807844 35206 14431 0 0 58199 50281 0 0 106893 58707 0 0 87885 30463 0 0 2358616 320211 0 0 2344439 333751 0 0 35206 0 0 33070 750845 802927 2001792 16086 32215 26.3301 26.3301 -15780.3 -26.3301 0 0 -1 -1 1.89 1.87 0.49 -1 -1 1.89 0.422747 0.368937 +k4_n4_v7_l1_bidir.xml ex1010.blif common 86.96 vpr 135.54 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1500 10 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 138788 10 10 4608 4618 0 3623 1520 41 41 1681 clb auto 59.6 MiB 0.97 45506 134.1 MiB 7.15 0.07 35.0666 -326.901 -35.0666 nan 7.64 0.01112 0.0090887 1.0511 0.859689 22 49059 47 4.563e+07 4.5e+07 -1 -1 48.96 4.41129 3.6562 118482 826103 -1 44567 14 23597 90262 5488495 951218 0 0 5488495 951218 51571 30862 0 0 102361 90324 0 0 194271 103531 0 0 150754 65172 0 0 2491692 330107 0 0 2497846 331222 0 0 51571 0 0 31811 582564 595064 662964 42219 79285 37.8509 nan -350.371 -37.8509 0 0 -1 -1 2.57 2.40 0.64 -1 -1 2.57 0.464393 0.404428 +k4_n4_v7_l1_bidir.xml ex5p.blif common 54.09 vpr 65.86 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 346 8 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 67440 8 63 1072 1135 0 907 417 21 21 441 clb auto 28.0 MiB 0.20 11821 65.9 MiB 0.85 0.01 15.1106 -656.442 -15.1106 nan 1.49 0.00325923 0.00254923 0.214238 0.18683 24 14318 41 1.083e+07 1.038e+07 -1 -1 46.82 0.97289 0.84189 32642 233591 -1 12054 17 6940 23303 2343367 380584 0 0 2343367 380584 21162 10849 0 0 26541 23589 0 0 52114 26762 0 0 58957 21151 0 0 1106107 146693 0 0 1078486 151540 0 0 21162 0 0 18893 282102 291344 911959 2272 196 16.1916 nan -727.382 -16.1916 0 0 -1 -1 0.59 0.75 0.16 -1 -1 0.59 0.126837 0.113701 +k4_n4_v7_l1_bidir.xml frisc.blif common 168.03 vpr 107.46 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1046 20 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 110040 20 116 4445 4561 1 2328 1182 35 35 1225 clb auto 52.2 MiB 0.82 37497 102.7 MiB 4.68 0.05 24.4278 -14825.6 -24.4278 24.4278 5.26 0.0100112 0.00829789 1.02722 0.863059 28 40186 28 3.267e+07 3.138e+07 -1 -1 140.77 4.60032 3.88152 103554 761463 -1 37393 16 14334 63647 6450908 1039336 0 0 6450908 1039336 51220 20703 0 0 72963 64296 0 0 143419 73506 0 0 122959 41808 0 0 3027944 414000 0 0 3032403 425023 0 0 51220 0 0 43985 879494 941241 2480121 13305 21431 25.9971 25.9971 -16524.4 -25.9971 0 0 -1 -1 2.25 2.32 0.53 -1 -1 2.25 0.461908 0.406094 +k4_n4_v7_l1_bidir.xml misex3.blif common 37.83 vpr 68.27 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 432 14 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 69912 14 14 1411 1425 0 1075 460 23 23 529 clb auto 30.5 MiB 0.29 13559 68.3 MiB 0.97 0.01 15.1312 -191.55 -15.1312 nan 1.72 0.00319552 0.00273496 0.226681 0.19278 24 14779 29 1.323e+07 1.296e+07 -1 -1 29.40 1.26787 1.07574 39522 283015 -1 13598 15 7073 26155 2058099 312011 0 0 2058099 312011 20194 9778 0 0 29527 26207 0 0 58435 29778 0 0 51685 19023 0 0 939017 112821 0 0 959241 114404 0 0 20194 0 0 15085 281799 316999 807207 6556 329 16.5576 nan -206.846 -16.5576 0 0 -1 -1 0.74 0.70 0.19 -1 -1 0.74 0.13185 0.117274 +k4_n4_v7_l1_bidir.xml pdc.blif common 704.96 vpr 151.73 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1529 16 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 155372 16 40 4591 4631 0 3652 1585 42 42 1764 clb auto 60.9 MiB 1.17 69783 139.0 MiB 8.28 0.07 33.871 -1073.72 -33.871 nan 8.09 0.0124289 0.00994374 1.29349 1.05837 36 80196 38 4.8e+07 4.587e+07 -1 -1 660.80 4.99864 4.09467 183520 1412616 -1 73362 22 26841 111690 29971058 6847814 0 0 29971058 6847814 76466 39532 0 0 126110 111903 0 0 258237 127170 0 0 209270 90615 0 0 14664297 3262554 0 0 14636678 3216040 0 0 76466 0 0 61859 1956575 1958439 4214975 39086 47642 37.7686 nan -1222.75 -37.7686 0 0 -1 -1 2.95 5.90 0.62 -1 -1 2.95 0.362121 0.308989 +k4_n4_v7_l1_bidir.xml s298.blif common 31.42 vpr 72.70 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 569 4 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 74448 4 6 1942 1948 1 1189 579 26 26 676 clb auto 34.9 MiB 0.30 13873 72.7 MiB 1.29 0.02 24.2645 -193.053 -24.2645 24.2645 2.49 0.00475038 0.00401284 0.33782 0.288048 17 15671 45 1.728e+07 1.707e+07 -1 -1 19.97 1.41569 1.20012 39072 255848 -1 14304 18 8473 41165 3833296 587270 0 0 3833296 587270 20044 11481 0 0 46389 41319 0 0 87504 46597 0 0 61683 25771 0 0 1820349 232879 0 0 1797327 229223 0 0 20044 0 0 18131 676939 645318 1754970 22976 56266 26.2082 26.2082 -219.739 -26.2082 0 0 -1 -1 0.68 1.27 0.18 -1 -1 0.68 0.225644 0.200505 +k4_n4_v7_l1_bidir.xml s38417.blif common 84.62 vpr 157.22 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1735 29 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 160996 29 106 7534 7640 1 4766 1870 44 44 1936 clb auto 75.3 MiB 1.24 45823 156.3 MiB 10.73 0.11 22.8839 -12843.2 -22.8839 22.8839 9.17 0.0179289 0.0144971 1.89489 1.5397 17 42804 48 5.292e+07 5.205e+07 -1 -1 36.44 7.21674 5.97127 115248 760028 -1 41227 28 29324 99099 9100578 1646509 0 0 9100578 1646509 74226 40556 0 0 113142 99736 0 0 206293 114295 0 0 203460 83895 0 0 4267481 655519 0 0 4235976 652508 0 0 74226 0 0 48435 1064315 1073325 3277497 26120 98729 25.3218 25.3218 -15829.1 -25.3218 0 0 -1 -1 2.33 4.05 0.55 -1 -1 2.33 1.22365 1.05247 +k4_n4_v7_l1_bidir.xml s38584.1.blif common 67.25 vpr 152.33 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1647 38 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 155984 38 304 7475 7779 1 4418 1989 43 43 1849 clb auto 74.5 MiB 1.27 43857 151.2 MiB 10.33 0.13 16.7322 -11603.5 -16.7322 16.7322 8.04 0.0160929 0.0129275 1.75651 1.42667 19 43230 46 5.043e+07 4.941e+07 -1 -1 25.65 5.82502 4.83199 116850 784767 -1 37598 11 19651 60501 3469053 596793 0 0 3469053 596793 51486 22909 0 0 71387 61426 0 0 123251 72199 0 0 123192 45868 0 0 1562459 187884 0 0 1537278 206507 0 0 51486 0 0 33652 387772 428572 1211453 9396 26419 17.258 17.258 -12827.2 -17.258 0 0 -1 -1 2.26 1.68 0.57 -1 -1 2.26 0.597634 0.51978 +k4_n4_v7_l1_bidir.xml seq.blif common 112.10 vpr 71.54 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 539 41 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 73252 41 35 1791 1826 0 1383 615 26 26 676 clb auto 33.7 MiB 0.36 18265 71.5 MiB 1.58 0.02 17.4129 -521.247 -17.4129 nan 2.47 0.00412259 0.00347615 0.352468 0.298175 24 22041 50 1.728e+07 1.617e+07 -1 -1 100.24 1.71907 1.44888 51072 366016 -1 18467 15 8949 32128 2827304 423587 0 0 2827304 423587 25144 11833 0 0 36756 32284 0 0 70197 37093 0 0 62412 24082 0 0 1320326 158987 0 0 1312469 159308 0 0 25144 0 0 18730 405555 439805 1113933 7435 1255 19.3002 nan -576.518 -19.3002 0 0 -1 -1 0.96 0.95 0.24 -1 -1 0.96 0.170585 0.148909 +k4_n4_v7_l1_bidir.xml spla.blif common 312.76 vpr 121.44 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1232 16 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 124356 16 46 3706 3752 0 2880 1294 38 38 1444 clb auto 51.9 MiB 0.84 47819 116.7 MiB 5.77 0.05 25.6975 -850.101 -25.6975 nan 6.49 0.0100554 0.00830429 0.971394 0.798786 32 52300 31 3.888e+07 3.696e+07 -1 -1 277.17 4.51179 3.74679 138672 1051752 -1 50528 21 21568 93360 15166036 2720735 0 0 15166036 2720735 64972 32585 0 0 104989 93585 0 0 214610 105807 0 0 175643 70717 0 0 7350580 1212848 0 0 7255242 1205193 0 0 64972 0 0 55204 1577363 1563473 3523260 32982 18780 29.3385 nan -989.871 -29.3385 0 0 -1 -1 3.07 4.66 0.73 -1 -1 3.07 0.495574 0.426224 +k4_n4_v7_l1_bidir.xml tseng.blif common 15.30 vpr 66.66 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 279 52 -1 -1 success a1966c4-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-16T20:55:46 gh-actions-runner-vtr-auto-spawned4 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 68264 52 122 1483 1605 1 736 453 19 19 361 clb auto 28.9 MiB 0.19 5903 66.7 MiB 0.77 0.01 9.31933 -2372.61 -9.31933 9.31933 1.15 0.00355213 0.00309294 0.236842 0.205728 14 6636 41 8.67e+06 8.37e+06 -1 -1 9.70 1.02295 0.884484 17850 109085 -1 5721 18 4691 16161 949670 190869 0 0 949670 190869 12380 7185 0 0 19102 16641 0 0 32981 19199 0 0 35910 16089 0 0 425733 65931 0 0 423564 65824 0 0 12380 0 0 9131 70540 68387 175028 4158 4411 10.8554 10.8554 -2988.61 -10.8554 0 0 -1 -1 0.26 0.43 0.07 -1 -1 0.26 0.149867 0.133188 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain_depop_flat_router/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain_depop_flat_router/config/golden_results.txt index d6944eeddb2..bb91c583e96 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain_depop_flat_router/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain_depop_flat_router/config/golden_results.txt @@ -1,6 +1,6 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml bgm.v common 1994.99 vpr 898.68 MiB -1 -1 61.01 621344 14 118.10 -1 -1 123276 -1 -1 2287 257 0 11 success 8528925-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:37:32 gh-actions-runner-vtr-auto-spawned40 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 920252 257 32 35747 33389 1 18576 2587 58 58 3364 clb auto 366.7 MiB 66.13 238606 672.7 MiB 114.11 0.89 17.1228 -22971.9 -17.1228 17.1228 66.43 0.122725 0.105945 15.0557 11.8391 78 394768 147 2.00088e+08 1.27615e+08 1.92320e+07 5717.01 1361.68 122.113 97.7861 1114397 11021065 660432 349453 53 168974 802689 141308298 36713877 45717834 6972091 95590464 29741786 0 0 792797 514828 747302 747302 910805 802689 2332383 1256218 40556637 4114854 1173010 341749 3457595 1539720 44978029 13690833 0 0 46359740 13705684 0 0 792797 0 1048626 1953882 3472000 3402169 10847209 13973 508 19.5428 19.5428 -25846 -19.5428 0 0 2.52407e+07 7305.90 14.24 130.51 4.88 34.68 0.19 14.24 14.3145 11.5453 -k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml LU8PEEng.v common 3071.55 vpr 833.25 MiB -1 -1 71.48 455940 98 132.11 -1 -1 115232 -1 -1 1800 114 45 8 success 8528925-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:37:32 gh-actions-runner-vtr-auto-spawned40 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 853252 114 102 35713 31804 1 16705 2069 51 51 2601 clb auto 339.9 MiB 62.95 216212 561.5 MiB 92.18 0.71 65.1279 -53179 -65.1279 65.1279 49.18 0.120059 0.101817 14.974 11.8502 96 359574 127 1.52527e+08 1.2484e+08 1.77902e+07 6839.76 2509.64 176.104 141.33 1051316 9491173 587229 299163 55 145183 626548 203387007 66494555 41102351 7195584 162284656 59298971 0 0 557595 348397 529205 529205 686930 626548 3869612 1660071 37419060 5169165 813788 245793 2438766 1051474 77969311 28517504 0 0 79102740 28346398 0 0 557595 0 1827926 1276985 1909761 1891717 6237045 74462 22447 75.1357 75.1357 -67113 -75.1357 -0.0967573 -0.0199062 2.21294e+07 8508.02 8.37 98.67 2.89 16.30 0.13 8.37 6.86691 5.51408 -k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml stereovision0.v common 491.17 vpr 361.63 MiB -1 -1 13.25 101972 5 13.80 -1 -1 69408 -1 -1 673 169 0 0 success 8528925-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:37:32 gh-actions-runner-vtr-auto-spawned40 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 370312 169 197 23321 21461 1 6785 1039 33 33 1089 clb auto 180.8 MiB 12.19 42112 221.0 MiB 12.30 0.12 3.10868 -13056.7 -3.10868 3.10868 5.95 0.0421986 0.0328892 4.76161 3.85299 58 63990 244 6.0475e+07 3.62708e+07 4.62388e+06 4245.99 385.70 79.3922 66.3623 452845 3145025 280765 57244 49 46719 122488 12901034 3413607 6417997 1071484 6483037 2342123 0 0 99450 91334 71450 71450 127765 122488 173471 117401 5739165 636855 123351 34238 451617 220807 2978920 1062567 0 0 3135845 1056467 0 0 99450 0 417639 158865 157759 165849 512493 25863 4191 3.58485 3.58485 -15007.4 -3.58485 0 0 5.85783e+06 5379.09 3.06 25.65 1.05 12.08 0.19 3.06 4.27202 3.58561 -k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml stereovision1.v common 1731.31 vpr 388.00 MiB -1 -1 10.47 123456 3 17.68 -1 -1 77352 -1 -1 655 115 0 40 success 8528925-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:37:32 gh-actions-runner-vtr-auto-spawned40 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 397312 115 145 22868 19305 1 9712 955 40 40 1600 mult_36 auto 177.4 MiB 9.52 82272 238.0 MiB 13.23 0.12 5.15059 -21406.4 -5.15059 5.15059 8.56 0.0361559 0.0312351 4.65663 3.86719 76 154775 184 9.16046e+07 5.11412e+07 8.72311e+06 5451.94 1612.65 90.8987 76.51 519336 3912846 269202 122007 30 61007 155688 38640235 6222360 6668491 918705 31971744 5303655 0 0 115015 107081 104788 104788 162701 155688 2093369 113670 5939763 436808 143497 55137 451012 219128 14854225 2513893 0 0 14775865 2516167 0 0 115015 0 643171 157906 453642 427511 882511 43939 8570 5.48939 5.48939 -24883.8 -5.48939 0 0 1.18598e+07 6887.37 5.55 30.87 1.96 11.01 0.18 5.55 3.14734 2.6417 -k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml stereovision2.v common 5871.63 vpr 1.05 GiB -1 -1 14.96 197124 3 8.63 -1 -1 155544 -1 -1 1490 149 0 179 success 8528925-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:37:32 gh-actions-runner-vtr-auto-spawned40 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 1100488 149 182 55416 37075 1 28670 2000 80 80 6400 mult_36 auto 360.6 MiB 29.07 291939 1074.7 MiB 79.70 0.55 12.5458 -48952.6 -12.5458 12.5458 135.77 0.108801 0.0950539 15.7001 12.8522 78 437698 224 3.90281e+08 1.51186e+08 3.79986e+07 5881.04 5479.15 81.5782 67.7414 1647473 12742193 618027 380228 28 138212 241867 64603805 10299476 9587932 1487408 55015873 8812068 0 0 208298 193889 166401 166401 247323 241867 3307020 170969 8438805 627104 233900 115896 693506 424548 25609640 4121299 0 0 25698912 4237503 0 0 208298 0 605901 210574 379701 368344 1028019 35661 9945 13.929 13.929 -56870.7 -13.929 0 0 4.76105e+07 7382.88 19.01 37.94 5.93 15.28 0.13 19.01 3.18631 2.64796 +k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml bgm.v common 1949.57 vpr 990.71 MiB -1 -1 50.15 621072 14 99.69 -1 -1 122864 -1 -1 2287 257 0 11 success 55c2e27-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-14T19:59:46 gh-actions-runner-vtr-auto-spawned44 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 1014488 257 32 35747 33389 1 18576 2587 58 58 3364 clb auto 367.8 MiB 49.23 238606 675.6 MiB 99.64 0.75 17.1228 -22971.9 -17.1228 17.1228 57.11 0.119025 0.107831 14.3708 12.0287 80 386942 147 2.00088e+08 1.27615e+08 2.55641e+07 7599.31 1408.13 208.49 171.921 1117761 11099171 660432 342443 53 167505 790267 138861012 37280309 45235241 6956044 93625771 30324265 0 0 780707 505938 732377 732377 896045 790267 2229510 1230476 40161721 4162132 1153740 331993 3396768 1497707 44121031 13984181 0 0 45389113 14045238 0 0 780707 0 1040996 1924168 3330711 3269807 10529459 13619 644 19.5517 19.5517 -25791.2 -19.5517 0 0 3.23730e+07 9623.36 12.91 116.72 4.28 26.08 0.17 12.91 13.5489 11.1323 +k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml LU8PEEng.v common 2632.75 vpr 902.39 MiB -1 -1 63.75 456032 98 112.40 -1 -1 115200 -1 -1 1800 114 45 8 success 55c2e27-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-14T19:59:46 gh-actions-runner-vtr-auto-spawned44 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 924048 114 102 35713 31804 1 16705 2069 51 51 2601 clb auto 341.1 MiB 46.35 216212 564.8 MiB 77.84 0.61 65.1279 -53179 -65.1279 65.1279 41.35 0.0919253 0.0798848 13.1066 11.0242 92 366424 199 1.52527e+08 1.2484e+08 2.23327e+07 8586.18 2159.64 106.951 86.8929 1046116 9369805 587229 297825 51 119193 529288 187167793 60416452 36019602 6388156 151148191 54028296 0 0 462068 262943 455608 455608 590472 529288 3425542 1460528 32931709 4746921 669785 192271 2035353 849004 72703611 26059047 0 0 73893645 25860842 0 0 462068 0 1676090 1068906 1746272 1715401 5449650 72797 21999 75.3335 75.3335 -66828.2 -75.3335 -0.584019 -0.0210131 2.84548e+07 10940.0 7.36 83.62 2.26 11.21 0.11 7.36 6.06269 4.82642 +k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml stereovision0.v common 283.73 vpr 392.11 MiB -1 -1 10.78 102076 5 11.41 -1 -1 69052 -1 -1 673 169 0 0 success 55c2e27-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-14T19:59:46 gh-actions-runner-vtr-auto-spawned44 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 401520 169 197 23321 21461 1 6785 1039 33 33 1089 clb auto 181.9 MiB 9.01 42112 242.0 MiB 10.27 0.10 3.10868 -13056.7 -3.10868 3.10868 4.94 0.0349474 0.0283969 3.80755 3.12721 56 67561 122 6.0475e+07 3.62708e+07 5.74716e+06 5277.47 200.29 41.3015 33.8499 450669 3098400 280765 58818 48 48056 123886 12722619 3198612 6403239 1066137 6319380 2132475 0 0 101707 94104 72144 72144 129292 123886 172012 107677 5708727 619500 127073 37688 463513 228647 2942661 962537 0 0 3005490 952429 0 0 101707 0 410238 163410 171121 169876 535147 24900 3262 3.58153 3.58153 -15246.6 -3.58153 0 0 7.31643e+06 6718.49 2.54 17.98 0.83 7.92 0.15 2.54 2.61894 2.14807 +k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml stereovision1.v common 1270.39 vpr 408.98 MiB -1 -1 9.93 123264 3 17.37 -1 -1 76916 -1 -1 655 115 0 40 success 55c2e27-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-14T19:59:46 gh-actions-runner-vtr-auto-spawned44 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 418792 115 145 22868 19305 1 9712 955 40 40 1600 mult_36 auto 178.3 MiB 8.01 82272 238.9 MiB 13.62 0.12 5.15059 -21406.4 -5.15059 5.15059 7.93 0.038643 0.0348079 4.98425 4.28022 76 156046 152 9.16046e+07 5.11412e+07 1.13224e+07 7076.52 1165.69 44.1282 36.8787 519336 3912846 269202 121805 23 60770 153906 36841266 5753488 6647072 905000 30194194 4848488 0 0 112979 104980 103434 103434 161203 153906 2026836 112210 5928687 430438 139538 54153 444203 215676 14009705 2291713 0 0 13914681 2286978 0 0 112979 0 639760 152004 429721 407753 821145 44544 8043 5.50488 5.50488 -25672.5 -5.50488 0 0 1.42154e+07 8884.65 4.94 22.27 1.75 7.91 0.15 4.94 1.46066 1.2121 +k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml stereovision2.v common 4904.93 vpr 1.08 GiB -1 -1 12.12 197324 3 7.26 -1 -1 155148 -1 -1 1490 149 0 179 success 55c2e27-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-08-14T19:59:46 gh-actions-runner-vtr-auto-spawned44 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 1137480 149 182 55416 37075 1 28670 2000 80 80 6400 mult_36 auto 361.5 MiB 22.38 291906 1080.0 MiB 72.56 0.56 12.5458 -48886.4 -12.5458 12.5458 119.83 0.106692 0.0933508 13.4628 11.5031 78 447800 230 3.90281e+08 1.51186e+08 4.81089e+07 7517.02 4519.15 84.1092 69.7736 1647473 12742193 618027 380030 24 136537 235998 65355519 10717370 9436646 1471004 55918873 9246366 0 0 204209 190100 162509 162509 241165 235998 3278790 169946 8312600 626282 228010 114098 678672 418624 26038560 4347361 0 0 26211004 4452452 0 0 204209 0 583219 203364 361375 355684 998388 33892 9670 13.9298 13.9298 -57390.1 -13.9298 0 0 6.11255e+07 9550.86 25.86 59.65 7.84 25.31 0.15 25.86 5.36109 4.49844 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_noc_mlp_odin_ii/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_noc_mlp_odin_ii/config/config.txt new file mode 100644 index 00000000000..975446fce53 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_noc_mlp_odin_ii/config/config.txt @@ -0,0 +1,43 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/noc/Large_Designs/MLP/MLP_1/verilog + +# Path to directory of architectures to use +archs_dir=arch/noc/mesh_noc_topology + +# Path to directory containing the verilog includes file(s) +includes_dir=benchmarks/noc/Large_Designs/MLP/shared_verilog + +# Path to directory of NoC Traffic Patterns to use +noc_traffics_dir=benchmarks/noc/Large_Designs/MLP/MLP_1 + +# Add circuits to list to sweep +circuit_list_add=mlp1_complete_engine.v + +# Add architectures to list to sweep +arch_list_add=coffe_7nm_NoC_mesh_topology.xml + +# Add NoC Traffic Patterns to list to sweep +noc_traffic_list_add=mlp_1.flows + +# Add include files to the list. +# Some benchmarks instantiate hard dsp and memory blocks +# This functionality is guarded under the `dsp_top` and other macros. +# The hard_block_include.v file +# defines this macros, thereby enabling instantiations of the hard blocks +include_list_add=hard_block_include.v + +# Parse info and how to parse +parse_file=vpr_standard.txt + +# How to parse QoR info +qor_parse_file=qor_standard.txt + +# Pass requirements +pass_requirements_file=pass_requirements.txt + +# Script parameters +script_params =-starting_stage odin --pack --place --allow_unrelated_clustering on --pack_high_fanout_threshold memory:100000 \ No newline at end of file diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/koios/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/koios/config/config.txt index 7bb68bed48e..1ccd16490d7 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/koios/config/config.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/koios/config/config.txt @@ -35,4 +35,6 @@ qor_parse_file=qor_standard.txt pass_requirements_file=pass_requirements.txt #Script parameters -script_params=-track_memory_usage +script_params_common=-track_memory_usage +script_params_list_add = +script_params_list_add = --router_algorithm parallel diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/koios/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/koios/config/golden_results.txt index 749f23089f4..8c6c0b532f1 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/koios/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/koios/config/golden_results.txt @@ -1,2 +1,3 @@ - arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time - k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml test.v common 6.55 vpr 74.02 MiB -1 -1 0.18 21664 1 0.04 -1 -1 35456 -1 -1 12 130 0 -1 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 75792 130 40 596 562 1 356 185 14 14 196 dsp_top auto 35.5 MiB 0.12 1674 74.0 MiB 0.12 0.00 5.12303 -567.54 -5.12303 5.12303 0.49 0.000709907 0.000632382 0.0521422 0.0466692 82 3380 8 4.93594e+06 1.0962e+06 1.24853e+06 6370.04 3.85 0.328078 0.303484 3282 8 751 823 207761 68347 4.57723 4.57723 -668.524 -4.57723 0 0 1.53695e+06 7841.58 0.29 0.05 0.025136 0.0240614 +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml test.v common 6.19 vpr 74.24 MiB -1 -1 0.16 18044 1 0.09 -1 -1 32460 -1 -1 12 130 0 -1 success v8.0.0-8293-gcafae33ff-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-08-02T01:36:29 redacted.eecg.utoronto.ca /home/redacted/par1/vtr-verilog-to-routing/vtr_flow/tasks 76020 130 40 596 562 1 356 185 14 14 196 dsp_top auto 35.4 MiB 0.10 1734 74.2 MiB 0.13 0.00 5.12303 -543.21 -5.12303 5.12303 0.45 0.000787704 0.000733664 0.0618895 0.0577764 82 3564 25 4.93594e+06 1.0962e+06 1.24853e+06 6370.04 3.47 0.374217 0.341793 33448 252102 -1 3384 9 709 754 192289 63961 0 0 192289 63961 754 713 0 0 18466 17836 0 0 19719 19117 0 0 755 715 0 0 69265 12759 0 0 83330 12821 0 0 754 0 0 45 152 212 1613 0 0 4.57723 4.57723 -644.847 -4.57723 0 0 1.53695e+06 7841.58 0.26 0.05 0.23 -1 -1 0.26 0.0256266 0.0242 +k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml test.v common_--router_algorithm_parallel 4.88 vpr 74.18 MiB -1 -1 0.16 18028 1 0.09 -1 -1 32508 -1 -1 12 130 0 -1 success v8.0.0-8293-gcafae33ff-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-08-02T01:36:29 redacted.eecg.utoronto.ca /home/redacted/par1/vtr-verilog-to-routing/vtr_flow/tasks 75956 130 40 596 562 1 356 185 14 14 196 dsp_top auto 35.4 MiB 0.10 1734 74.2 MiB 0.13 0.00 5.12303 -543.21 -5.12303 5.12303 0.45 0.000821246 0.000766795 0.0629599 0.0588552 82 3617 17 4.93594e+06 1.0962e+06 1.24853e+06 6370.04 2.14 0.308433 0.2822 33448 252102 -1 3414 14 731 776 220618 73159 0 0 220618 73159 776 735 0 0 20611 19950 0 0 21899 21238 0 0 777 738 0 0 80287 15212 0 0 96268 15286 0 0 776 0 0 45 151 219 1641 0 0 4.57723 4.57723 -637.466 -4.57723 0 0 1.53695e+06 7841.58 0.26 0.06 0.23 -1 -1 0.26 0.0330502 0.0309507 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/config.txt index caed2da9784..4e4071f98c2 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/config.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/config.txt @@ -24,5 +24,6 @@ qor_parse_file=qor_standard.txt # Pass requirements pass_requirements_file=pass_requirements.txt -script_params=-track_memory_usage --route_chan_width 100 --max_router_iterations 100 --router_lookahead map --flat_routing true - +script_params_common=-track_memory_usage --route_chan_width 100 --max_router_iterations 100 --router_lookahead map --flat_routing true +script_params_list_add = +script_params_list_add = --router_algorithm parallel --num_workers 4 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/golden_results.txt index 6a885701bc1..67f180f6b08 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/golden_results.txt @@ -1,2 +1,3 @@ - arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time - k6_frac_N10_frac_chain_mem32K_40nm.xml spree.v common 12.82 vpr 76.19 MiB -1 -1 3.42 34124 16 0.76 -1 -1 37916 -1 -1 61 45 3 1 success 8528925 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:34:55 gh-actions-runner-vtr-auto-spawned83 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 78016 45 32 1188 1147 1 781 142 14 14 196 memory auto 39.1 MiB 3.14 6687 76.2 MiB 0.85 0.01 9.87688 -6144.34 -9.87688 9.87688 0.04 0.00303074 0.00250348 0.260087 0.214733 -1 10707 13 9.20055e+06 5.32753e+06 1.21359e+06 5900 2.66 0.354898 0.295042 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +k6_frac_N10_frac_chain_mem32K_40nm.xml spree.v common 6.77 vpr 75.04 MiB -1 -1 1.37 31788 16 1.37 -1 -1 35456 -1 -1 61 45 3 1 success v8.0.0-8293-gcafae33ff-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-08-02T01:36:29 redacted.eecg.utoronto.ca /home/redacted/par1/vtr-verilog-to-routing/vtr_flow/tasks 76840 45 32 1188 1147 1 781 142 14 14 196 memory auto 38.0 MiB 1.38 6687 75.0 MiB 0.32 0.00 9.87688 -6144.34 -9.87688 9.87688 0.02 0.00160353 0.00140955 0.146887 0.130179 -1 10701 12 9.20055e+06 5.32753e+06 1.47691e+06 7535.23 1.15 0.187274 0.164699 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +k6_frac_N10_frac_chain_mem32K_40nm.xml spree.v common_--router_algorithm_parallel_--num_workers_4 6.97 vpr 74.97 MiB -1 -1 1.39 31556 16 1.40 -1 -1 35520 -1 -1 61 45 3 1 success v8.0.0-8293-gcafae33ff-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-08-02T01:36:29 redacted.eecg.utoronto.ca /home/redacted/par1/vtr-verilog-to-routing/vtr_flow/tasks 76772 45 32 1188 1147 1 781 142 14 14 196 memory auto 37.9 MiB 1.39 6687 75.0 MiB 0.40 0.00 9.87688 -6144.34 -9.87688 9.87688 0.02 0.00245521 0.00214283 0.217122 0.186066 -1 10603 14 9.20055e+06 5.32753e+06 1.47691e+06 7535.23 1.21 0.280862 0.238432 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/config.txt index 0c49b4e3405..dbceb44a4dc 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/config.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/config.txt @@ -24,5 +24,6 @@ qor_parse_file=qor_multiclock.txt # Pass requirements pass_requirements_file=pass_requirements_multiclock.txt -script_params=-starting_stage vpr -sdc_file tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/multiclock.sdc - +script_params_common=-starting_stage vpr -sdc_file tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/multiclock.sdc +script_params_list_add = +script_params_list_add = --router_algorithm parallel --num_workers 4 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/golden_results.txt index a939a6842c7..ff260f23dac 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/golden_results.txt @@ -1,2 +1,3 @@ arch circuit script_params crit_path_delay_mcw clk_to_clk_cpd clk_to_clk2_cpd clk_to_input_cpd clk_to_output_cpd clk2_to_clk2_cpd clk2_to_clk_cpd clk2_to_input_cpd clk2_to_output_cpd input_to_input_cpd input_to_clk_cpd input_to_clk2_cpd input_to_output_cpd output_to_output_cpd output_to_clk_cpd output_to_clk2_cpd output_to_input_cpd clk_to_clk_setup_slack clk_to_clk2_setup_slack clk_to_input_setup_slack clk_to_output_setup_slack clk2_to_clk2_setup_slack clk2_to_clk_setup_slack clk2_to_input_setup_slack clk2_to_output_setup_slack input_to_input_setup_slack input_to_clk_setup_slack input_to_clk2_setup_slack input_to_output_setup_slack output_to_output_setup_slack output_to_clk_setup_slack output_to_clk2_setup_slack output_to_input_setup_slack clk_to_clk_hold_slack clk_to_clk2_hold_slack clk_to_input_hold_slack clk_to_output_hold_slack clk2_to_clk2_hold_slack clk2_to_clk_hold_slack clk2_to_input_hold_slack clk2_to_output_hold_slack input_to_input_hold_slack input_to_clk_hold_slack input_to_clk2_hold_slack input_to_output_hold_slack output_to_output_hold_slack output_to_clk_hold_slack output_to_clk2_hold_slack output_to_input_hold_slack k6_frac_N10_mem32K_40nm.xml multiclock.blif common 1.31564 0.595 0.841581 -1 -1 0.57 0.814813 -1 1.31564 -1 1.07053 -1 1.76203 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0.243 1.71958 -1 -1 0.268 3.24281 -1 1.16427 -1 3.30853 -1 -1.48434 -1 -1 -1 -1 +k6_frac_N10_mem32K_40nm.xml multiclock.blif common_--router_algorithm_parallel_--num_workers_4 1.31564 0.595 0.841581 -1 -1 0.57 0.814813 -1 1.31564 -1 1.07053 -1 1.76203 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0.243 1.71958 -1 -1 0.268 3.24281 -1 1.16427 -1 3.30853 -1 -1.48434 -1 -1 -1 -1 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing/config/config.txt index 401c36ecd01..dac263af64c 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing/config/config.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing/config/config.txt @@ -24,5 +24,6 @@ qor_parse_file=qor_standard.txt pass_requirements_file=pass_requirements.txt # Script parameters -#script_params="" -script_params = -track_memory_usage +script_params_common = -track_memory_usage +script_params_list_add = +script_params_list_add = --router_algorithm parallel --num_workers 4 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing/config/golden_results.txt index 0ec96460c6e..a5312f38fc9 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing/config/golden_results.txt @@ -1,2 +1,3 @@ - arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time - k6_frac_N10_mem32K_40nm.xml ch_intrinsics.v common 2.81 vpr 64.00 MiB -1 -1 0.21 21792 3 0.07 -1 -1 36304 -1 -1 68 99 1 0 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 65540 99 130 343 473 1 225 298 12 12 144 clb auto 25.9 MiB 0.13 574 64.0 MiB 0.16 0.00 1.63028 -109.727 -1.63028 1.63028 0.24 0.000401182 0.000358398 0.0349089 0.0312228 40 1376 20 5.66058e+06 4.21279e+06 333335. 2314.82 1.17 0.219159 0.200557 1211 9 370 555 25048 7436 1.97803 1.97803 -136.611 -1.97803 -1.34293 -0.298787 419432. 2912.72 0.10 0.02 0.0138875 0.0131731 +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +k6_frac_N10_mem32K_40nm.xml ch_intrinsics.v common 2.72 vpr 63.91 MiB -1 -1 0.18 18244 3 0.15 -1 -1 33476 -1 -1 68 99 1 0 success v8.0.0-8293-gcafae33ff-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-08-02T01:36:29 redacted.eecg.utoronto.ca /home/redacted/par1/vtr-verilog-to-routing/vtr_flow/tasks 65444 99 130 343 473 1 225 298 12 12 144 clb auto 25.5 MiB 0.10 599 63.9 MiB 0.15 0.00 1.62851 -108.153 -1.62851 1.62851 0.22 0.0005716 0.000537284 0.0435 0.0407889 36 1445 27 5.66058e+06 4.21279e+06 305235. 2119.69 0.98 0.22339 0.203215 12238 58442 -1 1263 12 429 686 37045 11418 0 0 37045 11418 686 536 0 0 1992 1802 0 0 2359 1992 0 0 742 603 0 0 15126 3546 0 0 16140 2939 0 0 686 0 0 257 388 336 2661 0 0 1.99752 1.99752 -139.829 -1.99752 -0.305022 -0.0771249 378970. 2631.74 0.08 0.03 0.04 -1 -1 0.08 0.0189281 0.0175612 +k6_frac_N10_mem32K_40nm.xml ch_intrinsics.v common_--router_algorithm_parallel_--num_workers_4 2.26 vpr 64.16 MiB -1 -1 0.19 18428 3 0.15 -1 -1 33484 -1 -1 68 99 1 0 success v8.0.0-8293-gcafae33ff-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-08-02T01:36:29 redacted.eecg.utoronto.ca /home/redacted/par1/vtr-verilog-to-routing/vtr_flow/tasks 65700 99 130 343 473 1 225 298 12 12 144 clb auto 25.6 MiB 0.10 599 64.2 MiB 0.15 0.00 1.62851 -108.153 -1.62851 1.62851 0.22 0.000550016 0.000510375 0.0416236 0.0381932 36 1454 19 5.66058e+06 4.21279e+06 305235. 2119.69 0.52 0.126906 0.113733 12238 58442 -1 1272 12 419 668 36954 11455 0 0 36954 11455 668 526 0 0 2004 1826 0 0 2365 2004 0 0 724 593 0 0 15013 3573 0 0 16180 2933 0 0 668 0 0 249 384 322 2589 0 0 1.99231 1.99231 -140.914 -1.99231 -0.305022 -0.0771249 378970. 2631.74 0.08 0.03 0.04 -1 -1 0.08 0.0174112 0.0155977 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing_update_type/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing_update_type/config/config.txt index 3c7366d98fe..17b20f60f24 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing_update_type/config/config.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing_update_type/config/config.txt @@ -29,3 +29,5 @@ script_params_list_add = --timing_update_type auto script_params_list_add = --timing_update_type full script_params_list_add = --timing_update_type incremental script_params_list_add = --timing_update_type incremental --quench_recompute_divider 999999999 #Do post-move incremental STA during quench +script_params_list_add = --timing_update_type incremental --router_algorithm parallel --num_workers 4 # rarely exercised code path +script_params_list_add = --timing_update_type full --router_algorithm parallel --num_workers 4 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing_update_type/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing_update_type/config/golden_results.txt index e3f3510b5ec..37fb4f22f53 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing_update_type/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_timing_update_type/config/golden_results.txt @@ -1,5 +1,7 @@ - arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time - k6_N10_mem32K_40nm.xml stereovision3.v common_--timing_update_type_auto 1.16 vpr 62.23 MiB -1 -1 0.49 25724 5 0.12 -1 -1 35796 -1 -1 12 10 0 0 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 63728 10 2 181 183 1 40 24 6 6 36 clb auto 23.6 MiB 0.03 152 62.2 MiB 0.01 0.00 2.0099 -85.4829 -2.0099 2.0099 0.00 0.000148273 0.000125883 0.00293958 0.00254045 -1 137 15 646728 646728 138825. 3856.24 0.01 0.00984083 0.00873552 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 - k6_N10_mem32K_40nm.xml stereovision3.v common_--timing_update_type_full 1.15 vpr 62.10 MiB -1 -1 0.42 25388 5 0.14 -1 -1 35504 -1 -1 12 10 0 0 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 63592 10 2 181 183 1 40 24 6 6 36 clb auto 23.5 MiB 0.04 152 62.1 MiB 0.01 0.00 2.0099 -85.4829 -2.0099 2.0099 0.00 0.000124802 9.9759e-05 0.00754593 0.00711966 -1 137 15 646728 646728 138825. 3856.24 0.01 0.0162485 0.0150465 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 - k6_N10_mem32K_40nm.xml stereovision3.v common_--timing_update_type_incremental 1.13 vpr 62.04 MiB -1 -1 0.42 25392 5 0.13 -1 -1 35796 -1 -1 12 10 0 0 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 63528 10 2 181 183 1 40 24 6 6 36 clb auto 23.4 MiB 0.04 152 62.0 MiB 0.01 0.00 2.0099 -85.4829 -2.0099 2.0099 0.00 1.3455e-05 9.755e-06 0.00205916 0.00187256 -1 137 15 646728 646728 138825. 3856.24 0.01 0.00865869 0.00791492 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 - k6_N10_mem32K_40nm.xml stereovision3.v common_--timing_update_type_incremental_--quench_recompute_divider_999999999 1.08 vpr 61.99 MiB -1 -1 0.42 25488 5 0.12 -1 -1 35964 -1 -1 12 10 0 0 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 63480 10 2 181 183 1 40 24 6 6 36 clb auto 23.5 MiB 0.03 152 62.0 MiB 0.01 0.00 2.0099 -85.4829 -2.0099 2.0099 0.00 9.8431e-05 2.9719e-05 0.00312475 0.00288136 -1 137 15 646728 646728 138825. 3856.24 0.01 0.00941001 0.0086255 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +k6_N10_mem32K_40nm.xml stereovision3.v common_--timing_update_type_auto 1.43 vpr 61.82 MiB -1 -1 0.34 22744 5 0.31 -1 -1 33812 -1 -1 12 10 0 0 success v8.0.0-8293-gcafae33ff-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-08-02T01:36:29 redacted.eecg.utoronto.ca /home/redacted/par1/vtr-verilog-to-routing/vtr_flow/tasks 63308 10 2 181 183 1 40 24 6 6 36 clb auto 23.2 MiB 0.03 171 61.8 MiB 0.01 0.00 2.06897 -87.8888 -2.06897 2.06897 0.00 0.00021372 0.000192501 0.00180931 0.00171502 -1 163 21 646728 646728 138825. 3856.24 0.02 0.0128896 0.0113993 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +k6_N10_mem32K_40nm.xml stereovision3.v common_--timing_update_type_full 1.39 vpr 61.79 MiB -1 -1 0.34 22548 5 0.32 -1 -1 33868 -1 -1 12 10 0 0 success v8.0.0-8293-gcafae33ff-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-08-02T01:36:29 redacted.eecg.utoronto.ca /home/redacted/par1/vtr-verilog-to-routing/vtr_flow/tasks 63276 10 2 181 183 1 40 24 6 6 36 clb auto 23.2 MiB 0.03 171 61.8 MiB 0.01 0.00 2.06897 -87.8888 -2.06897 2.06897 0.00 0.000213827 0.000192479 0.00182632 0.00173178 -1 163 21 646728 646728 138825. 3856.24 0.02 0.0128195 0.0112985 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +k6_N10_mem32K_40nm.xml stereovision3.v common_--timing_update_type_incremental 1.37 vpr 61.70 MiB -1 -1 0.32 22740 5 0.30 -1 -1 33952 -1 -1 12 10 0 0 success v8.0.0-8293-gcafae33ff-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-08-02T01:36:29 redacted.eecg.utoronto.ca /home/redacted/par1/vtr-verilog-to-routing/vtr_flow/tasks 63184 10 2 181 183 1 40 24 6 6 36 clb auto 23.2 MiB 0.03 171 61.7 MiB 0.01 0.00 2.06897 -87.8888 -2.06897 2.06897 0.00 6.0326e-05 4.8099e-05 0.00129504 0.00122282 -1 163 21 646728 646728 138825. 3856.24 0.01 0.00725272 0.00528838 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +k6_N10_mem32K_40nm.xml stereovision3.v common_--timing_update_type_incremental_--quench_recompute_divider_999999999 1.44 vpr 61.68 MiB -1 -1 0.34 22584 5 0.34 -1 -1 33880 -1 -1 12 10 0 0 success v8.0.0-8293-gcafae33ff-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-08-02T01:36:29 redacted.eecg.utoronto.ca /home/redacted/par1/vtr-verilog-to-routing/vtr_flow/tasks 63156 10 2 181 183 1 40 24 6 6 36 clb auto 23.2 MiB 0.03 171 61.7 MiB 0.01 0.00 2.06897 -87.8888 -2.06897 2.06897 0.00 0.000173798 8.9569e-05 0.00138827 0.00124749 -1 163 21 646728 646728 138825. 3856.24 0.02 0.0112762 0.00755481 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +k6_N10_mem32K_40nm.xml stereovision3.v common_--timing_update_type_incremental_--router_algorithm_parallel_--num_workers_4 1.40 vpr 61.78 MiB -1 -1 0.33 22868 5 0.31 -1 -1 33924 -1 -1 12 10 0 0 success v8.0.0-8293-gcafae33ff-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-08-02T01:36:29 redacted.eecg.utoronto.ca /home/redacted/par1/vtr-verilog-to-routing/vtr_flow/tasks 63264 10 2 181 183 1 40 24 6 6 36 clb auto 23.2 MiB 0.03 171 61.8 MiB 0.01 0.00 2.06897 -87.8888 -2.06897 2.06897 0.00 8.3175e-05 5.0203e-05 0.00133949 0.00121351 -1 163 21 646728 646728 138825. 3856.24 0.01 0.00784067 0.00532275 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +k6_N10_mem32K_40nm.xml stereovision3.v common_--timing_update_type_full_--router_algorithm_parallel_--num_workers_4 1.42 vpr 61.66 MiB -1 -1 0.34 22744 5 0.30 -1 -1 33908 -1 -1 12 10 0 0 success v8.0.0-8293-gcafae33ff-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-08-02T01:36:29 redacted.eecg.utoronto.ca /home/redacted/par1/vtr-verilog-to-routing/vtr_flow/tasks 63140 10 2 181 183 1 40 24 6 6 36 clb auto 23.1 MiB 0.03 171 61.7 MiB 0.01 0.00 2.06897 -87.8888 -2.06897 2.06897 0.00 0.000453545 0.000422888 0.00262389 0.00239838 -1 163 21 646728 646728 138825. 3856.24 0.02 0.0186074 0.0163246 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1