From fac58aa217f7b4c4d5327b201f5e9fe8cbf9ee6a Mon Sep 17 00:00:00 2001 From: "Siang-Yun (Sonia) Lee" Date: Fri, 30 Jun 2023 15:36:19 +0200 Subject: [PATCH] Realistic AQFP technology constraints (#614) * missing includes * realistic assumptions & updated verification * modifications from unmerged PR #594 * update basic ASAP ALAP scheduling * update counting & dump * update chunk movement * compile & debug * debug chunk movement * clean up depth-optimal scheduling * visualization * move PIs and POs, debug * remove buffer chains; clean up * cleanup * fix merging mistakes * fix retiming * fix tests * delete deprecated verification method * fix experiments --- experiments/aqfp_flow_aspdac.cpp | 22 +- experiments/aqfp_flow_date.cpp | 5 +- experiments/buffer_insertion.cpp | 93 +- experiments/buffer_insertion_iwls.cpp | 101 -- experiments/buffer_insertion_iwls.json | 160 -- .../algorithms/aqfp/aqfp_assumptions.hpp | 86 +- .../algorithms/aqfp/aqfp_rebuild.hpp | 29 +- .../algorithms/aqfp/aqfp_retiming.hpp | 2 +- .../algorithms/aqfp/buffer_insertion.hpp | 1550 +++++++++-------- .../algorithms/aqfp/buffer_verification.hpp | 192 +- .../aqfp/optimal_buffer_insertion.hpp | 85 + include/mockturtle/io/write_dot.hpp | 22 + include/mockturtle/mockturtle.hpp | 2 + include/mockturtle/networks/buffered.hpp | 90 +- test/algorithms/aqfp/aqfp_retiming.cpp | 7 +- test/algorithms/aqfp/buffer_insertion.cpp | 328 +--- 16 files changed, 1326 insertions(+), 1448 deletions(-) delete mode 100644 experiments/buffer_insertion_iwls.cpp delete mode 100644 experiments/buffer_insertion_iwls.json diff --git a/experiments/aqfp_flow_aspdac.cpp b/experiments/aqfp_flow_aspdac.cpp index cd713694e..9385c219b 100644 --- a/experiments/aqfp_flow_aspdac.cpp +++ b/experiments/aqfp_flow_aspdac.cpp @@ -91,15 +91,18 @@ int main() /* convert MIG network to AQFP */ aqfp_network aqfp = cleanup_dangling( mig_opt ); + aqfp_assumptions_legacy aqfp_ps; + aqfp_ps.splitter_capacity = 4; + aqfp_ps.branch_pis = true; + aqfp_ps.balance_pis = true; + aqfp_ps.balance_pos = true; + /* Buffer insertion params */ buffer_insertion_params buf_ps; buf_ps.scheduling = buffer_insertion_params::better_depth; buf_ps.optimization_effort = buffer_insertion_params::none; buf_ps.max_chunk_size = 100; - buf_ps.assume.splitter_capacity = 4u; - buf_ps.assume.branch_pis = true; - buf_ps.assume.balance_pis = true; - buf_ps.assume.balance_pos = true; + buf_ps.assume = legacy_to_realistic( aqfp_ps ); /* buffer insertion */ stopwatch<>::duration time_insertion{ 0 }; @@ -110,12 +113,6 @@ int main() uint32_t jj_depth = buf_inst.depth(); total_runtime += to_seconds( time_insertion ); - aqfp_assumptions aqfp_ps; - aqfp_ps.splitter_capacity = buf_ps.assume.splitter_capacity; - aqfp_ps.branch_pis = buf_ps.assume.branch_pis; - aqfp_ps.balance_pis = buf_ps.assume.balance_pis; - aqfp_ps.balance_pos = buf_ps.assume.balance_pos; - /* retiming params */ aqfp_retiming_params aps; aps.aqfp_assumptions_ps = aqfp_ps; @@ -169,7 +166,10 @@ int main() /* cec */ auto cec = abc_cec( buffered_aqfp, benchmark ); - cec &= verify_aqfp_buffer( buffered_aqfp, aqfp_ps ); + std::vector pi_levels; + for ( auto i = 0u; i < buffered_aqfp.num_pis(); ++i ) + pi_levels.emplace_back( 0 ); + cec &= verify_aqfp_buffer( buffered_aqfp, aqfp_ps, pi_levels ); /* compute final JJ cost */ uint32_t num_jjs_ret = 0; diff --git a/experiments/aqfp_flow_date.cpp b/experiments/aqfp_flow_date.cpp index 1a8267ef1..a2f03c634 100644 --- a/experiments/aqfp_flow_date.cpp +++ b/experiments/aqfp_flow_date.cpp @@ -401,9 +401,8 @@ int main( int argc, char** argv ) buf_ps.optimization_effort = buffer_insertion_params::until_sat; buf_ps.max_chunk_size = std::numeric_limits::max(); buf_ps.assume.splitter_capacity = 4u; - buf_ps.assume.branch_pis = false; - buf_ps.assume.balance_pis = false; - buf_ps.assume.balance_pos = true; + buf_ps.assume.ci_capacity = std::numeric_limits::max(); + buf_ps.assume.balance_cios = true; buffer_insertion buf_inst( aqfp, buf_ps ); uint32_t num_bufs = buf_inst.dry_run(); uint32_t num_jjs = opt_stats.maj3_after_exact * 6 + opt_stats.maj5_after_exact * 10 + num_bufs * 2; diff --git a/experiments/buffer_insertion.cpp b/experiments/buffer_insertion.cpp index 86545ef40..c83bb24d2 100644 --- a/experiments/buffer_insertion.cpp +++ b/experiments/buffer_insertion.cpp @@ -23,18 +23,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ #include "experiments.hpp" -#include #include #include #include #include -#include -#include -#include -#include #include #include -#include +#include #include #include #include @@ -44,6 +39,8 @@ #include +using namespace mockturtle; + int main( int argc, char* argv[] ) { std::string run_only_one = ""; @@ -59,9 +56,9 @@ int main( int argc, char* argv[] ) /* NOTE 2: Please clone this repository: https://github.com/lsils/SCE-benchmarks * And put in the following string the relative path from your build path to SCE-benchmarks/ISCAS/strashed/ */ - std::string benchmark_path = "../../SCE-benchmarks/ISCAS/strashed/"; + // std::string benchmark_path = "../../SCE-benchmarks/ISCAS/strashed/"; // std::string benchmark_path = "../../SCE-benchmarks/MCNC/original/"; - // std::string benchmark_path = "../../SCE-benchmarks/EPFL/MIGs/"; + std::string benchmark_path = "../../SCE-benchmarks/EPFL/MIGs/"; static const std::string benchmarks_iscas[] = { "adder1", "adder8", "mult8", "counter16", "counter32", "counter64", "counter128", "c17", "c432", "c499", "c880", "c1355", "c1908", "c2670", "c3540", "c5315", "c6288", "c7552", @@ -71,33 +68,30 @@ int main( int argc, char* argv[] ) "m3", "max512", "misex3", "mlp4", "prom2", "sqr6", "x1dn" }; const auto benchmarks_epfl = experiments::epfl_benchmarks(); - experiment - exp( "buffer_insertion", "benchmark", "#gates", "depth", "max FO", "#buffers", "opt. #JJs", "depth_JJ", "runtime", "verified" ); + experiment + exp( "buffer_insertion", "benchmark", "#gates", "#buffers", "#buff real", "max phase skip", "depth_JJ", "runtime", "verified" ); buffer_insertion_params ps; - ps.scheduling = buffer_insertion_params::better; - ps.optimization_effort = buffer_insertion_params::until_sat; - ps.assume.splitter_capacity = 4u; - ps.assume.branch_pis = true; - ps.assume.balance_pis = true; - ps.assume.balance_pos = true; - - if ( argc == 3 ) // example syntax: ./buffer_insertion 4 111 - { - ps.assume.splitter_capacity = std::stoi( argv[1] ); - uint32_t arg = std::stoi( argv[2] ); - ps.assume.branch_pis = arg >= 100; - ps.assume.balance_pis = ( arg % 100 ) >= 10; - ps.assume.balance_pos = arg % 10; - } + ps.scheduling = buffer_insertion_params::better_depth; + ps.optimization_effort = buffer_insertion_params::none; + ps.max_chunk_size = 10000; + + // ASP-DAC etc. SoTA works + //ps.assume.num_phases = 1; + //ps.assume.ci_phases = {0u}; + //ps.assume.ci_capacity = 1; + //ps.assume.splitter_capacity = 4; + //ps.assume.balance_cios = true; + + // best possible relaxation + ps.assume.ci_capacity = 2; + ps.assume.ci_phases = { 3u, 4u, 5u }; uint32_t total_buffers{ 0 }, total_depth{ 0 }; - for ( auto benchmark : benchmarks_iscas ) + for ( auto benchmark : benchmarks_epfl ) { if ( run_only_one != "" && benchmark != run_only_one ) continue; - if ( benchmark == "hyp" && run_only_one != "hyp" ) - continue; std::cout << "\n[i] processing " << benchmark << "\n"; names_view ntk; @@ -114,21 +108,28 @@ int main( int argc, char* argv[] ) stopwatch<>::duration t{ 0 }; buffer_insertion aqfp( ntk, ps ); buffered_mig_network bufntk; + std::vector pi_levels( ntk.num_pis() ); uint32_t num_buffers = call_with_stopwatch( t, [&]() { - return aqfp.dry_run(); + return aqfp.run( bufntk, pi_levels ); } ); - aqfp.dump_buffered_network( bufntk ); - bool verified = verify_aqfp_buffer( bufntk, ps.assume ); + bool verified = verify_aqfp_buffer( bufntk, ps.assume, pi_levels ); + auto const levels = schedule_buffered_network_with_PI_levels( bufntk, pi_levels ); + + uint32_t max_chain = aqfp.remove_buffer_chains( bufntk ); // names_view named_bufntk{bufntk}; // restore_pio_names_by_order( ntk, named_bufntk ); // write_verilog( named_bufntk, benchmark_path + "../best_insertion/" + benchmark + "_buffered.v" ); - depth_view d{ ntk }; - depth_view d_buf{ bufntk }; +#if 0 + depth_view depth_buffered( bufntk ); + depth_buffered.foreach_node( [&]( auto n ){ depth_buffered.set_level( n, levels[n] ); } ); + write_dot( depth_buffered, benchmark + ".dot" ); + std::system( fmt::format( "dot -Tpng -o {0}.png {0}.dot; rm {0}.dot; open {0}.png", benchmark ).c_str() ); +#endif total_buffers += num_buffers; - total_depth += d_buf.depth(); + total_depth += aqfp.depth(); uint32_t max_fanout{ 0 }; ntk.foreach_node( [&]( auto const& n ) { @@ -136,7 +137,29 @@ int main( int argc, char* argv[] ) max_fanout = std::max( max_fanout, ntk.fanout_size( n ) ); } ); - exp( benchmark, ntk.num_gates(), d.depth(), max_fanout, num_buffers, ntk.num_gates() * 6 + num_buffers * 2, d_buf.depth(), to_seconds( t ), verified ); + uint32_t num_buffers_real{0}, max_phase_skip{0}; + + bufntk.foreach_node( [&]( auto n ){ + if ( bufntk.is_buf( n ) && !bufntk.is_dead( n ) ) + num_buffers_real++; + }); + max_phase_skip = max_chain; + for ( auto pil : pi_levels ) + { + if ( pil % 4 == 1 ) + max_phase_skip = std::max( max_phase_skip, pil - 5 ); + else if ( pil % 4 == 0 ) + max_phase_skip = std::max( max_phase_skip, pil - 4 ); + else if ( pil % 4 == 3 ) + max_phase_skip = std::max( max_phase_skip, pil - 3 ); + else + fmt::print( "strange pi level {}\n", pil ); + } + bufntk.foreach_po( [&]( auto f ){ + max_phase_skip = std::max( max_phase_skip, aqfp.depth() - levels[f] ); + }); + + exp( benchmark, ntk.num_gates(), num_buffers, num_buffers_real, max_phase_skip, aqfp.depth(), to_seconds( t ), verified ); } exp.save(); diff --git a/experiments/buffer_insertion_iwls.cpp b/experiments/buffer_insertion_iwls.cpp deleted file mode 100644 index 56bc21117..000000000 --- a/experiments/buffer_insertion_iwls.cpp +++ /dev/null @@ -1,101 +0,0 @@ -/* mockturtle: C++ logic network library - * Copyright (C) 2018-2022 EPFL - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "experiments.hpp" -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* Note: Please download this repository: https://github.com/lsils/ASPDAC2021_exp - and copy the folder ASPDAC2021_exp/experiments/benchmarks_aqfp/ to the build path of mockturtle. */ -int main() -{ - using namespace experiments; - using namespace mockturtle; - - experiment - exp( "buffer_insertion_iwls", "benchmark", "#gates", "depth", "ASAP", "ALAP", "opt", "depth_JJ" ); - - static const std::string benchmarks_aqfp[] = { - /*"5xp1",*/ "c1908", "c432", "c5315", "c880", "chkn", "count", "dist", "in5", "in6", "k2", - "m3", "max512", "misex3", "mlp4", "prom2", "sqr6", "x1dn" }; - - for ( auto const& benchmark : benchmarks_aqfp ) - { - uint32_t b_ASAP, b_ALAP, b_OPT; - fmt::print( "[i] processing {}\n", benchmark ); - mig_network mig; - if ( lorina::read_verilog( "benchmarks_aqfp/" + benchmark + ".v", verilog_reader( mig ) ) != lorina::return_code::success ) - return -1; - - buffer_insertion_params ps; - ps.optimization_effort = buffer_insertion_params::until_sat; - ps.assume.splitter_capacity = 3u; - ps.assume.branch_pis = true; - ps.assume.balance_pis = false; - ps.assume.balance_pos = false; - - buffer_insertion aqfp( mig, ps ); - - aqfp.ASAP(); - aqfp.count_buffers(); - b_ASAP = aqfp.num_buffers(); - - aqfp.ALAP(); - aqfp.count_buffers(); - b_ALAP = aqfp.num_buffers(); - - if ( b_ALAP > b_ASAP ) - { - aqfp.ASAP(); // UNDO ALAP - aqfp.count_buffers(); - } - - aqfp.optimize(); - aqfp.count_buffers(); - b_OPT = aqfp.num_buffers(); - - buffered_mig_network bufntk; - aqfp.dump_buffered_network( bufntk ); - depth_view d_buf{ bufntk }; - assert( verify_aqfp_buffer( bufntk, ps.assume ) ); - - depth_view d{ mig }; - exp( benchmark, mig.num_gates(), d.depth(), b_ASAP, b_ALAP, b_OPT, d_buf.depth() ); - } - - exp.save(); - exp.table(); - - return 0; -} \ No newline at end of file diff --git a/experiments/buffer_insertion_iwls.json b/experiments/buffer_insertion_iwls.json deleted file mode 100644 index c6d367ed0..000000000 --- a/experiments/buffer_insertion_iwls.json +++ /dev/null @@ -1,160 +0,0 @@ -[ - { - "entries": [ - { - "#gates": 381, - "ALAP": 2910, - "ASAP": 2605, - "benchmark": "c1908", - "depth": 38, - "depth_JJ": 62, - "opt": 2202 - }, - { - "#gates": 174, - "ALAP": 1891, - "ASAP": 2423, - "benchmark": "c432", - "depth": 44, - "depth_JJ": 65, - "opt": 1673 - }, - { - "#gates": 1270, - "ALAP": 4197, - "ASAP": 6409, - "benchmark": "c5315", - "depth": 33, - "depth_JJ": 56, - "opt": 3574 - }, - { - "#gates": 300, - "ALAP": 1448, - "ASAP": 1854, - "benchmark": "c880", - "depth": 28, - "depth_JJ": 40, - "opt": 1238 - }, - { - "#gates": 421, - "ALAP": 785, - "ASAP": 1536, - "benchmark": "chkn", - "depth": 28, - "depth_JJ": 34, - "opt": 715 - }, - { - "#gates": 119, - "ALAP": 343, - "ASAP": 639, - "benchmark": "count", - "depth": 18, - "depth_JJ": 24, - "opt": 286 - }, - { - "#gates": 535, - "ALAP": 791, - "ASAP": 1066, - "benchmark": "dist", - "depth": 16, - "depth_JJ": 24, - "opt": 761 - }, - { - "#gates": 443, - "ALAP": 814, - "ASAP": 1278, - "benchmark": "in5", - "depth": 19, - "depth_JJ": 27, - "opt": 746 - }, - { - "#gates": 370, - "ALAP": 674, - "ASAP": 1002, - "benchmark": "in6", - "depth": 17, - "depth_JJ": 23, - "opt": 621 - }, - { - "#gates": 1955, - "ALAP": 3812, - "ASAP": 4512, - "benchmark": "k2", - "depth": 25, - "depth_JJ": 37, - "opt": 3249 - }, - { - "#gates": 411, - "ALAP": 611, - "ASAP": 761, - "benchmark": "m3", - "depth": 13, - "depth_JJ": 19, - "opt": 567 - }, - { - "#gates": 713, - "ALAP": 1081, - "ASAP": 1361, - "benchmark": "max512", - "depth": 17, - "depth_JJ": 26, - "opt": 1028 - }, - { - "#gates": 1532, - "ALAP": 2983, - "ASAP": 4113, - "benchmark": "misex3", - "depth": 24, - "depth_JJ": 34, - "opt": 2811 - }, - { - "#gates": 462, - "ALAP": 645, - "ASAP": 839, - "benchmark": "mlp4", - "depth": 16, - "depth_JJ": 23, - "opt": 603 - }, - { - "#gates": 3477, - "ALAP": 5435, - "ASAP": 6777, - "benchmark": "prom2", - "depth": 22, - "depth_JJ": 33, - "opt": 5259 - }, - { - "#gates": 138, - "ALAP": 225, - "ASAP": 287, - "benchmark": "sqr6", - "depth": 13, - "depth_JJ": 17, - "opt": 200 - }, - { - "#gates": 152, - "ALAP": 399, - "ASAP": 453, - "benchmark": "x1dn", - "depth": 14, - "depth_JJ": 19, - "opt": 362 - } - ], - "version": "5becf81" - } -] diff --git a/include/mockturtle/algorithms/aqfp/aqfp_assumptions.hpp b/include/mockturtle/algorithms/aqfp/aqfp_assumptions.hpp index fe29ebe46..b8f22e3d7 100644 --- a/include/mockturtle/algorithms/aqfp/aqfp_assumptions.hpp +++ b/include/mockturtle/algorithms/aqfp/aqfp_assumptions.hpp @@ -35,24 +35,104 @@ namespace mockturtle { +/*! \brief More realistic AQFP technology assumptions. */ +struct aqfp_assumptions_realistic +{ + /*! \brief Whether CIs and COs need to be path-balanced. */ + bool balance_cios{ false }; + + /*! \brief Ignores the complementations of COs because they can be merged into register inputs. */ + bool ignore_co_negation{ true }; + + /*! \brief Number of phases per clock cycle (for phase alignment). + * + * Each CO (a node with external reference) must be scheduled at a level being a multiple of + * `num_phases` (i.e., an imaginary CO node should be placed at a level `num_phases * k + 1`). + */ + uint32_t num_phases{ 4u }; + + /*! \brief The maximum number of fanouts a splitter/buffer can have. */ + uint32_t splitter_capacity{ 3u }; + + /*! \brief The maximum number of fanouts a mega splitter can have. */ + //uint32_t mega_splitter_capacity{ 7u }; + + /*! \brief The maximum number of fanouts a CI can have. */ + uint32_t ci_capacity{ 1u }; // simplicity + //uint32_t ci_capacity{ 2u }; // best possible + + /*! \brief The phase offsets (after a change in register input) when new register output is available. + * + * Assumes that the register inputs (D and E) are scheduled at phase 0 (i.e., the last phase of + * the previous clock cycle), a new state is available to be taken at these numbers of phases + * afterwards. + * + * An ascending order is assumed. At least one element should be given. + * + * Each CI must be scheduled at a level `num_phases * k + ci_phases[i]` (for any `i`; for any + * integer `k >= 0` when `balance_cios = false`, or `k=0` otherwise). + */ + std::vector ci_phases{ { 4u } }; // simplicity + //std::vector ci_phases{ { 3u, 4u, 5u } }; // best possible + + /*! \brief Maximum phase-skip (in consideration of clock skew). */ + uint32_t max_phase_skip{ 4u }; +}; + /*! \brief AQFP technology assumptions. * * POs count toward the fanout sizes and always have to be branched. * If PIs need to be balanced, then they must also need to be branched. */ -struct aqfp_assumptions +struct aqfp_assumptions_legacy { /*! \brief Whether PIs need to be branched with splitters. */ - bool branch_pis{ false }; + bool branch_pis{ true }; /*! \brief Whether PIs need to be path-balanced. */ bool balance_pis{ false }; /*! \brief Whether POs need to be path-balanced. */ - bool balance_pos{ true }; + bool balance_pos{ false }; /*! \brief The maximum number of fanouts each splitter (buffer) can have. */ uint32_t splitter_capacity{ 3u }; }; +using aqfp_assumptions = aqfp_assumptions_legacy; + +/* Temporary helper function to bridge old and new code. */ +inline aqfp_assumptions_realistic legacy_to_realistic( aqfp_assumptions_legacy const& legacy ) +{ + aqfp_assumptions_realistic realistic; + + if ( !legacy.branch_pis ) + { + realistic.ci_capacity = std::numeric_limits::max(); + } + else + { + realistic.ci_capacity = 1u; + } + + if ( legacy.balance_pis && legacy.balance_pos ) + { + realistic.balance_cios = true; + } + else if ( !legacy.balance_pis && !legacy.balance_pos ) + { + realistic.balance_cios = false; + } + else + { + std::cerr << "[e] Cannot convert this combinaiton of assumptions.\n"; + } + + realistic.splitter_capacity = legacy.splitter_capacity; + realistic.num_phases = 1u; // no phase alignment + realistic.ci_phases = {0u}; // PIs at level 0 + realistic.max_phase_skip = std::numeric_limits::max(); // no clock skew issue + return realistic; +} + } // namespace mockturtle diff --git a/include/mockturtle/algorithms/aqfp/aqfp_rebuild.hpp b/include/mockturtle/algorithms/aqfp/aqfp_rebuild.hpp index 657ca2e99..880d7c22b 100644 --- a/include/mockturtle/algorithms/aqfp/aqfp_rebuild.hpp +++ b/include/mockturtle/algorithms/aqfp/aqfp_rebuild.hpp @@ -109,29 +109,24 @@ class aqfp_reconstruct_impl /* compute the node level on the new network */ node_map levels( clean_ntk ); + _ntk.foreach_gate( [&]( auto const& n ) { + levels[old2new[n]] = ntk_level.level( n ); + } ); - if ( _ps.buffer_insertion_ps.assume.branch_pis ) + uint32_t max_po_level = 0; + clean_ntk.foreach_po( [&]( auto const& f ){ + uint32_t spl = std::ceil( std::log( clean_ntk.fanout_size( clean_ntk.get_node( f ) ) ) / std::log( _ps.buffer_insertion_ps.assume.splitter_capacity ) ); + max_po_level = std::max( max_po_level, levels[f] + spl ); + }); + std::vector po_levels; + for ( auto i = 0u; i < _ntk.num_pos(); ++i ) { - /* gates are in a fixed position */ - _ntk.foreach_gate( [&]( auto const& n ) { - levels[old2new[n]] = ntk_level.level( n ); - } ); - } - else - { - /* gates are not in a fixed position */ - /* gates are scheduled ALAP */ - - /* if not balance POs, POs are scheduled ASAP */ - auto const levels_guess = schedule_buffered_network( _ntk, _ps.buffer_insertion_ps.assume ); - _ntk.foreach_gate( [&]( auto const& n ) { - levels[old2new[n]] = levels_guess[n]; - } ); + po_levels.emplace_back( max_po_level + 1 ); } /* recompute splitter trees and return the new buffered network */ buffered_aqfp_network res; - buffer_insertion buf_inst( clean_ntk, levels, _ps.buffer_insertion_ps ); + buffer_insertion buf_inst( clean_ntk, levels, po_levels, _ps.buffer_insertion_ps ); _st.num_buffers = buf_inst.run( res ); return res; } diff --git a/include/mockturtle/algorithms/aqfp/aqfp_retiming.hpp b/include/mockturtle/algorithms/aqfp/aqfp_retiming.hpp index 651af47f9..b7ff4ef94 100644 --- a/include/mockturtle/algorithms/aqfp/aqfp_retiming.hpp +++ b/include/mockturtle/algorithms/aqfp/aqfp_retiming.hpp @@ -140,7 +140,7 @@ class aqfp_retiming_impl rps.iterations = 1; buffer_insertion_params buf_ps; - buf_ps.assume = _ps.aqfp_assumptions_ps; + buf_ps.assume = legacy_to_realistic( _ps.aqfp_assumptions_ps ); buf_ps.scheduling = buffer_insertion_params::provided; buf_ps.optimization_effort = buffer_insertion_params::none; aqfp_reconstruct_params reconstruct_ps; diff --git a/include/mockturtle/algorithms/aqfp/buffer_insertion.hpp b/include/mockturtle/algorithms/aqfp/buffer_insertion.hpp index 793a3f87e..52a2a92ad 100644 --- a/include/mockturtle/algorithms/aqfp/buffer_insertion.hpp +++ b/include/mockturtle/algorithms/aqfp/buffer_insertion.hpp @@ -57,7 +57,7 @@ namespace mockturtle struct buffer_insertion_params { /*! \brief Technology assumptions. */ - aqfp_assumptions assume; + aqfp_assumptions_realistic assume; /*! \brief The scheduling strategy to get the initial depth assignment. * - `provided` = An initial level assignment is given in the constructor, thus @@ -139,10 +139,6 @@ struct buffer_insertion_params mig_network mig = ... buffer_insertion_params ps; - ps.assume.branch_pis = true; - ps.assume.balance_pis = false; - ps.assume.balance_pos = true; - ps.assume.splitter_capacity = 3u; ps.scheduling = buffer_insertion_params::ALAP; ps.optimization_effort = buffer_insertion_params::one_pass; @@ -178,7 +174,7 @@ class buffer_insertion using signal = typename Ntk::signal; explicit buffer_insertion( Ntk const& ntk, buffer_insertion_params const& ps = {} ) - : _ntk( ntk ), _ps( ps ), _levels( _ntk ), _timeframes( _ntk ), _fanouts( _ntk ), _external_ref_count( _ntk ), _external_ref_count_neg( _ntk ), _num_buffers( _ntk ), _min_level( _ntk ), _max_level( _ntk ) + : _ntk( ntk ), _ps( ps ), _levels( _ntk ), _po_levels( _ntk.num_pos(), 0u ), _timeframes( _ntk ), _fanouts( _ntk ), _num_buffers( _ntk ) { static_assert( !is_buffered_network_type_v, "Ntk is already buffered" ); static_assert( has_foreach_node_v, "Ntk does not implement the foreach_node method" ); @@ -194,13 +190,15 @@ class buffer_insertion static_assert( has_set_visited_v, "Ntk does not implement the set_visited method" ); static_assert( has_set_value_v, "Ntk does not implement the set_value method" ); - assert( !( _ps.assume.balance_pis && !_ps.assume.branch_pis ) && "Does not make sense to balance but not branch PIs" ); assert( _ps.scheduling != buffer_insertion_params::provided ); - initialize_external_ref_counts(); + + // checks for assumptions + assert( _ps.assume.ci_phases.size() > 0 ); + assert( _ps.assume.ignore_co_negation ); // consideration of CO negation is too complicated and neglected for now } - explicit buffer_insertion( Ntk const& ntk, node_map const& levels, buffer_insertion_params const& ps = {} ) - : _ntk( ntk ), _ps( ps ), _levels( levels ), _timeframes( _ntk ), _fanouts( _ntk ), _external_ref_count( _ntk ), _external_ref_count_neg( _ntk ), _num_buffers( _ntk ), _min_level( _ntk ), _max_level( _ntk ) + explicit buffer_insertion( Ntk const& ntk, node_map const& levels, std::vector const& po_levels, buffer_insertion_params const& ps = {} ) + : _ntk( ntk ), _ps( ps ), _levels( levels ), _po_levels( po_levels ), _timeframes( _ntk ), _fanouts( _ntk ), _num_buffers( _ntk ) { static_assert( !is_buffered_network_type_v, "Ntk is already buffered" ); static_assert( has_foreach_node_v, "Ntk does not implement the foreach_node method" ); @@ -216,23 +214,41 @@ class buffer_insertion static_assert( has_set_visited_v, "Ntk does not implement the set_visited method" ); static_assert( has_set_value_v, "Ntk does not implement the set_value method" ); - assert( !( _ps.assume.balance_pis && !_ps.assume.branch_pis ) && "Does not make sense to balance but not branch PIs" ); assert( _ps.scheduling == buffer_insertion_params::provided ); - initialize_external_ref_counts(); + assert( _po_levels.size() == _ntk.num_pos() ); + } + + /*! \brief Insert buffers and obtain a buffered network. + * + * \param bufntk An empty network of an appropriate buffered network type to + * to store the buffer-insertion result + * \return The number of buffers in the resulting network + */ + template + uint32_t run( BufNtk& bufntk ) + { + dry_run(); + dump_buffered_network( bufntk ); + return num_buffers(); } /*! \brief Insert buffers and obtain a buffered network. + * + * It is suggested to write the `pi_levels` information into a dumped file + * for easier recovery of the scheduled phase assignment. + * * \param bufntk An empty network of an appropriate buffered network type to * to store the buffer-insertion result - * \param pLevels A pointer to a node map which will store the resulting - * level assignment + * \param pi_lvls A vector which will store the PI level assignment (it is + * recommended to store this information together with the buffered network) * \return The number of buffers in the resulting network */ template - uint32_t run( BufNtk& bufntk, node_map* pLevels = nullptr ) + uint32_t run( BufNtk& bufntk, std::vector& pi_lvls ) { - dry_run( pLevels ); + dry_run(); dump_buffered_network( bufntk ); + pi_lvls = pi_levels(); return num_buffers(); } @@ -242,23 +258,26 @@ class buffer_insertion * allows users to experiment on the algorithms with new network types whose * corresponding buffered_network are not implemented yet. * - * \param pLevels A pointer to a node map which will store the resulting - * level assignment + * `pLevels` and `pPOLevels` can be used to create another `buffer_insertion` instance of + * the same state (current schedule), which also define a unique buffered network. (Set + * `ps.scheduling = provided` and `ps.optimization_effort = none`) + * * \return The number of buffers in the resulting network */ - uint32_t dry_run( node_map* pLevels = nullptr ) + uint32_t dry_run() { schedule(); optimize(); count_buffers(); - - if ( pLevels ) - *pLevels = _levels; - return num_buffers(); } #pragma region Query + node_map const& levels() const + { + return _levels; + } + /*! \brief Level of node `n` considering buffer/splitter insertion. */ uint32_t level( node const& n ) const { @@ -266,13 +285,34 @@ class buffer_insertion return _levels[n]; } + std::vector const& po_levels() const + { + return _po_levels; + } + + /*! \brief Level of the `idx`-th PO (imaginary dummy PO node, not counted in depth). */ + uint32_t po_level( uint32_t idx ) const + { + assert( idx < _ntk.num_pos() ); + return _po_levels[idx]; + } + + std::vector pi_levels() const + { + std::vector lvls; + _ntk.foreach_pi( [&]( auto n ){ + lvls.emplace_back( _levels[n] ); + } ); + return lvls; + } + /*! \brief Network depth considering AQFP buffers/splitters. * - * Note that when neither PIs nor POs are balanced, there can be - * different schedulings for the same buffered network (i.e. having - * the same number of buffers), thus this number may be different - * from the depth obtained by dumping the buffered network and wrapping - * depth_view around it. + * Should be equal to `max( po_level(i) - 1 )`. + * + * This is the number of phases from the previous-stage register to the + * next-stage register, including the depth of the previous-stage register + * (i.e., from one register input to the next register input). */ uint32_t depth() const { @@ -286,15 +326,9 @@ class buffer_insertion assert( !_outdated && "Please call `count_buffers()` first." ); uint32_t count = 0u; - if ( _ps.assume.branch_pis ) - { - _ntk.foreach_pi( [&]( auto const& n ) { + _ntk.foreach_node( [&]( auto const& n ) { + if ( !_ntk.is_constant( n ) ) count += num_buffers( n ); - } ); - } - - _ntk.foreach_gate( [&]( auto const& n ) { - count += num_buffers( n ); } ); return count; } @@ -307,8 +341,8 @@ class buffer_insertion return _num_buffers[n]; } - /*! \brief The choosen schedule is ASAP */ - uint32_t is_scheduled_ASAP() const + /*! \brief The chosen schedule is ASAP */ + bool is_scheduled_ASAP() const { return _is_scheduled_ASAP; } @@ -328,21 +362,14 @@ class buffer_insertion update_fanout_info(); } - if ( _ps.assume.branch_pis ) - { - _ntk.foreach_pi( [&]( auto const& n ) { - assert( !_ps.assume.balance_pis || _levels[n] == 0 ); + _ntk.foreach_node( [&]( auto const& n ) { + if ( !_ntk.is_constant( n ) ) _num_buffers[n] = count_buffers( n ); - } ); - } - - _ntk.foreach_gate( [&]( auto const& n ) { - _num_buffers[n] = count_buffers( n ); } ); } private: - uint32_t count_buffers( node const& n ) + uint32_t count_buffers( node const& n ) const { assert( !_outdated && "Please call `update_fanout_info()` first." ); auto const& fo_infos = _fanouts[n]; @@ -356,78 +383,29 @@ class buffer_insertion if ( _ntk.fanout_size( n ) == 1u ) /* single fanout */ { - if ( _external_ref_count[n] == 1u && !_ps.assume.balance_pos ) - return 0u; - else - return fo_infos.front().relative_depth - 1u; - } - - /* special case: don't balance POs; multiple PO refs but no gate fanout */ - if ( !_ps.assume.balance_pos && _ntk.fanout_size( n ) == _external_ref_count[n] ) - { - /* have both polarities */ - if ( _external_ref_count_neg[n] > 0 && _external_ref_count[n] > _external_ref_count_neg[n] ) - return std::ceil( float( _external_ref_count[n] - _external_ref_count_neg[n] - 1 ) / float( _ps.assume.splitter_capacity - 1 ) ) + std::ceil( float( _external_ref_count_neg[n] - 1 ) / float( _ps.assume.splitter_capacity - 1 ) ) + 1; - else - return std::ceil( float( _external_ref_count[n] - 1 ) / float( _ps.assume.splitter_capacity - 1 ) ); + assert( fo_infos.size() == 1u ); + return fo_infos.front().relative_depth - 1u; } - assert( fo_infos.size() > 1u ); - uint32_t count{ 0u }; - - /* special case: don't balance POs; have both gate fanout(s) and PO ref(s) */ - if ( !_ps.assume.balance_pos && _external_ref_count[n] > 0u ) + if ( _ps.assume.ci_capacity > 1 && _ntk.is_pi( n ) ) { - /* hacky (rare?) case */ - /*fmt::print( "[w] hacky case: node {} has {} fanouts, including {} gates, {} positive PO refs and {} negative PO refs.\n", - n, _ntk.fanout_size( n ), _ntk.fanout_size( n ) - _external_ref_count[n], - _external_ref_count[n] - _external_ref_count_neg[n], _external_ref_count_neg[n] );*/ - - /* count ignoring POs */ - auto rit = fo_infos.rbegin(); - assert( rit->fanouts.size() == 0 ); - while ( rit->fanouts.size() == 0 ) - ++rit; - auto nedges = rit->fanouts.size(); - auto prev_rd = rit->relative_depth; - for ( ++rit; rit != fo_infos.rend(); ++rit ) - { - nedges = num_splitters( nedges ); - count += nedges + prev_rd - rit->relative_depth - 1; - nedges += rit->fanouts.size(); - prev_rd = rit->relative_depth; - } - assert( nedges == 1 ); - - /* check if available slots in the remaining buffers are enough for POs */ - auto slots = count * ( _ps.assume.splitter_capacity - 1 ) + 1; - int32_t needed_pos = _ntk.fanout_size( n ) - _external_ref_count_neg[n] - slots; - if ( _external_ref_count_neg[n] > 0 ) - ++needed_pos; - if ( needed_pos > 0 ) + if ( fo_infos.size() == 1u ) { - count += std::ceil( float( needed_pos ) / float( _ps.assume.splitter_capacity - 1 ) ); - } - if ( _external_ref_count_neg[n] > 0 ) - { - count += std::max( std::ceil( float( _external_ref_count_neg[n] - 1 ) / float( _ps.assume.splitter_capacity - 1 ) ), 1 ); + assert( fo_infos.front().relative_depth == 1u ); + return 0u; } - - return count; } - /* main counting */ + assert( fo_infos.size() > 1u ); auto it = fo_infos.begin(); - count = it->num_edges; - auto rd = it->relative_depth; + uint32_t count = it->num_edges - it->fanouts.size() - it->extrefs.size(); + uint32_t rd = it->relative_depth; for ( ++it; it != fo_infos.end(); ++it ) { - count += it->num_edges - it->fanouts.size() + it->relative_depth - rd - 1; + count += it->num_edges - it->fanouts.size() - it->extrefs.size() + it->relative_depth - rd - 1; rd = it->relative_depth; } - /* PO refs were added as num_edges and counted as buffers */ - count -= _external_ref_count[n]; return count; } @@ -435,90 +413,37 @@ class buffer_insertion uint32_t num_splitter_levels( node const& n ) const { assert( n < _ntk.size() ); + if ( _ntk.is_pi( n ) ) + { + if ( _ntk.fanout_size( n ) > _ps.assume.ci_capacity ) + return std::ceil( std::log( _ntk.fanout_size( n ) - _ps.assume.ci_capacity + 1 ) / std::log( _ps.assume.splitter_capacity ) ); + else + return 0u; + } return std::ceil( std::log( _ntk.fanout_size( n ) ) / std::log( _ps.assume.splitter_capacity ) ); } - uint32_t num_splitter_levels_po( node const& n ) const - { - assert( n < _ntk.size() ); - if ( _ntk.fanout_size( n ) == 1 ) - return 0; - if ( _external_ref_count_neg[n] == 0 || _external_ref_count_neg[n] == _external_ref_count[n] ) - return std::ceil( std::log( _ntk.fanout_size( n ) ) / std::log( _ps.assume.splitter_capacity ) ); - return std::max( std::ceil( std::log( _ntk.fanout_size( n ) - _external_ref_count_neg[n] ) / std::log( _ps.assume.splitter_capacity ) ), - std::ceil( std::log( _external_ref_count_neg[n] ) / std::log( _ps.assume.splitter_capacity ) ) ) + - 1; // suboptimal - } -#pragma endregion - -private: -#pragma region Update fanout info - void initialize_external_ref_counts() - { - _ntk.foreach_po( [&]( auto const& f ) { - if ( !_ntk.is_constant( _ntk.get_node( f ) ) ) - { - _external_ref_count[f]++; - if ( _ntk.is_complemented( f ) ) - { - _external_ref_count_neg[f]++; - } - } - } ); - } - /* Update fanout_information of all nodes */ void update_fanout_info() { _fanouts.reset(); + _ntk.foreach_gate( [&]( auto const& n ) { _ntk.foreach_fanin( n, [&]( auto const& fi ) { auto const ni = _ntk.get_node( fi ); if ( !_ntk.is_constant( ni ) ) - { insert_fanout( ni, n ); - } } ); } ); - _ntk.foreach_node( [&]( auto const& n ) { - if ( !_ps.assume.branch_pis && _ntk.is_pi( n ) ) - return true; - if ( _external_ref_count[n] > 0u ) - _fanouts[n].push_back( { _depth + 1 - _levels[n], {}, _external_ref_count[n] } ); - return true; - } ); - - /* //debugging checks - if ( !_ps.assume.branch_pis ) - { - _ntk.foreach_pi( [&]( auto const& n ) { assert( _fanouts[n].size() == 0 ); }); - _ntk.foreach_gate( [&]( auto const& n ) { - if ( _ntk.fanout_size( n ) == 1 ) assert( _fanouts[n].size() == 1 ); - else assert( _fanouts[n].front().relative_depth > 1 ); - }); - } - else - { - _ntk.foreach_node( [&]( auto const& n ) { - if ( _ntk.is_constant( n ) || _ntk.fanout_size( n ) == 0 ) return true; - if ( _ntk.fanout_size( n ) == 1 ) assert( _fanouts[n].size() == 1 ); - else assert( _fanouts[n].front().relative_depth > 1 ); - return true; - }); - } - */ - - _ntk.foreach_gate( [&]( auto const& n ) { - count_edges( n ); + _ntk.foreach_po( [&]( auto const& f, auto i ){ + insert_extref( _ntk.get_node( f ), i ); } ); - if ( _ps.assume.branch_pis ) - { - _ntk.foreach_pi( [&]( auto const& n ) { + _ntk.foreach_node( [&]( auto const& n ) { + if ( !_ntk.is_constant( n ) ) count_edges( n ); - } ); - } + } ); _outdated = false; } @@ -528,6 +453,7 @@ class buffer_insertion bool update_fanout_info( node const& n ) { std::vector fos; + std::vector extrefs; for ( auto it = _fanouts[n].begin(); it != _fanouts[n].end(); ++it ) { if ( it->fanouts.size() ) @@ -535,24 +461,26 @@ class buffer_insertion for ( auto it2 = it->fanouts.begin(); it2 != it->fanouts.end(); ++it2 ) fos.push_back( *it2 ); } + if ( it->extrefs.size() ) + { + for ( auto it2 = it->extrefs.begin(); it2 != it->extrefs.end(); ++it2 ) + extrefs.push_back( *it2 ); + } } _fanouts[n].clear(); for ( auto& fo : fos ) insert_fanout( n, fo ); - - if ( _external_ref_count[n] > 0u ) - _fanouts[n].push_back( { _depth + 1 - _levels[n], {}, _external_ref_count[n] } ); + for ( auto& po : extrefs ) + insert_extref( n, po ); return count_edges( n ); } void insert_fanout( node const& n, node const& fanout ) { - if ( !_ps.assume.branch_pis && _ntk.is_pi( n ) ) - return; + assert( _levels[fanout] > _levels[n] ); auto const rd = _levels[fanout] - _levels[n]; - assert( rd > 0 ); auto& fo_infos = _fanouts[n]; for ( auto it = fo_infos.begin(); it != fo_infos.end(); ++it ) { @@ -564,11 +492,33 @@ class buffer_insertion } else if ( it->relative_depth > rd ) { - fo_infos.insert( it, { rd, { fanout }, 1u } ); + fo_infos.insert( it, { rd, { fanout }, {}, 1u } ); + return; + } + } + fo_infos.push_back( { rd, { fanout }, {}, 1u } ); + } + + void insert_extref( node const& n, uint32_t idx ) + { + assert( _po_levels[idx] > _levels[n] ); + auto const rd = _po_levels[idx] - _levels[n]; + auto& fo_infos = _fanouts[n]; + for ( auto it = fo_infos.begin(); it != fo_infos.end(); ++it ) + { + if ( it->relative_depth == rd ) + { + it->extrefs.push_back( idx ); + ++it->num_edges; + return; + } + else if ( it->relative_depth > rd ) + { + fo_infos.insert( it, { rd, {}, {idx}, 1u } ); return; } } - fo_infos.push_back( { rd, { fanout }, 1u } ); + fo_infos.push_back( { rd, {}, {idx}, 1u } ); } template @@ -580,41 +530,44 @@ class buffer_insertion { return true; } - assert( fo_infos.front().relative_depth > 1u ); - fo_infos.push_front( { 1u, {}, 0u } ); + + if ( _ntk.is_pi( n ) && _ps.assume.ci_capacity > 1 ) + { + if ( fo_infos.front().relative_depth > 1u ) + fo_infos.push_front( { 1u, {}, {}, 0u } ); + } + else + { + assert( fo_infos.front().relative_depth > 1u ); + fo_infos.push_front( { 1u, {}, {}, 0u } ); + } auto it = fo_infos.end(); --it; uint32_t splitters; - if ( _external_ref_count_neg[n] > 0 ) - splitters = num_splitters_po( _external_ref_count[n] - _external_ref_count_neg[n], _external_ref_count_neg[n] ); - else - splitters = num_splitters( it->num_edges ); - while ( it != fo_infos.begin() ) { + splitters = num_splitters( it->num_edges ); auto rd = it->relative_depth; --it; if ( it->relative_depth < rd - 1 && splitters > 1 ) { - ++it; - it = fo_infos.insert( it, { rd - 1, {}, splitters } ); + it = fo_infos.insert( ++it, { rd - 1, {}, {}, splitters } ); } else { it->num_edges += splitters; } - splitters = num_splitters( it->num_edges ); } assert( fo_infos.front().relative_depth == 1u ); if constexpr ( verify ) { - return fo_infos.front().num_edges == 1u; + return _ntk.is_pi( n ) ? fo_infos.front().num_edges <= _ps.assume.ci_capacity : fo_infos.front().num_edges == 1u; } else { - assert( fo_infos.front().num_edges == 1u ); + assert( _ntk.is_pi( n ) ? fo_infos.front().num_edges <= _ps.assume.ci_capacity : fo_infos.front().num_edges == 1u ); return true; } } @@ -624,34 +577,51 @@ class buffer_insertion { return std::ceil( float( num_fanouts ) / float( _ps.assume.splitter_capacity ) ); } - - uint32_t num_splitters_po( uint32_t num_positive, uint32_t num_negative ) const - { - return std::ceil( float( num_positive ) / float( _ps.assume.splitter_capacity ) ) + std::ceil( float( num_negative ) / float( _ps.assume.splitter_capacity ) ); - } #pragma endregion -#pragma region Level assignment +#pragma region Initial level assignment public: /*! \brief Obtain the initial level assignment using the specified scheduling policy */ void schedule() { if ( _ps.scheduling == buffer_insertion_params::provided ) { - _ntk.foreach_po( [&]( auto const& f ) { - _depth = std::max( _depth, _levels[f] + num_splitter_levels( _ntk.get_node( f ) ) ); + _ntk.foreach_po( [&]( auto const& f, auto i ) { + assert( _po_levels[i] > _levels[f] ); + _depth = std::max( _depth, _po_levels[i] - 1 ); } ); + assert( _depth % _ps.assume.num_phases == 0 ); + return; } - else if ( _ps.scheduling == buffer_insertion_params::better_depth || _ps.scheduling == buffer_insertion_params::ASAP_depth || _ps.scheduling == buffer_insertion_params::ALAP_depth ) + + if ( _ps.scheduling == buffer_insertion_params::better_depth || _ps.scheduling == buffer_insertion_params::ASAP_depth || _ps.scheduling == buffer_insertion_params::ALAP_depth ) { fanout_view f_ntk{ _ntk }; - depth_optimal_schedule( f_ntk ); - } - else - { - ASAP(); + /* Optimum-depth ALAP scheduling */ + ALAP_depth( f_ntk ); + count_buffers(); + auto const num_buf_ALAP_depth = num_buffers(); + + if ( _ps.scheduling == buffer_insertion_params::ALAP_depth ) + return; + + /* Optimum-depth ALAP scheduling: no balanced trees */ + ASAP_depth( f_ntk, false ); + count_buffers(); + auto const num_buf_ASAP_depth = num_buffers(); + + if ( _ps.scheduling == buffer_insertion_params::ASAP_depth ) + return; + + /* Revert to optimum-depth ALAP scheduling if better */ + if ( num_buf_ALAP_depth < num_buf_ASAP_depth ) + { + ALAP_depth( f_ntk ); + } + return; } + ASAP(); if ( _ps.scheduling == buffer_insertion_params::ALAP ) { ALAP(); @@ -676,10 +646,29 @@ class buffer_insertion _levels.reset( 0 ); _ntk.incr_trav_id(); - _ntk.foreach_po( [&]( auto const& f ) { + _ntk.foreach_po( [&]( auto const& f, auto i ) { auto const no = _ntk.get_node( f ); - auto clevel = compute_levels_ASAP( no ) + num_splitter_levels_po( no ); - _depth = std::max( _depth, clevel ); + _po_levels[i] = compute_levels_ASAP( no ) + num_splitter_levels( no ) + 1; + if ( ( _po_levels[i] - 1 ) % _ps.assume.num_phases != 0 ) // phase alignment + { + _po_levels[i] += _ps.assume.num_phases - ( ( _po_levels[i] - 1 ) % _ps.assume.num_phases ); + } + _depth = std::max( _depth, _po_levels[i] - 1 ); + } ); + assert( _depth % _ps.assume.num_phases == 0 ); + + if ( _ps.assume.balance_cios ) + { + _ntk.foreach_po( [&]( auto const& f, auto i ) { + (void)f; + _po_levels[i] = _depth + 1; + } ); + } + + /* dangling PIs */ + _ntk.foreach_pi( [&]( auto const& n ){ + if ( _ntk.visited( n ) != _ntk.trav_id() ) + _levels[n] = _ps.assume.ci_phases[0]; } ); _outdated = true; @@ -694,22 +683,45 @@ class buffer_insertion */ void ASAP_depth( fanout_view const& f_ntk, bool try_regular ) { - node_map mobility( _ntk, UINT32_MAX ); + node_map mobility( _ntk, std::numeric_limits::max() ); - _ntk.foreach_node( [&]( auto const& n ) { - if ( _ntk.is_constant( n ) || _ntk.is_pi( n ) ) - { - mobility[n] = _levels[n]; - } + if ( !_ps.assume.balance_cios ) + { + _ntk.foreach_po( [&]( auto const& f, auto i ) { + (void)f; + _po_levels[i] = 0; + } ); + } + _ntk.foreach_pi( [&]( auto const& n ) { if ( !_ntk.is_constant( n ) ) { + mobility[n] = _levels[n] - _ps.assume.ci_phases[0]; compute_mobility_ASAP( f_ntk, n, mobility, try_regular ); } + } ); - _min_level[n] = _levels[n]; + _ntk.foreach_gate( [&]( auto const& n ) { + compute_mobility_ASAP( f_ntk, n, mobility, try_regular ); } ); + if ( !_ps.assume.balance_cios ) + { + _ntk.foreach_po( [&]( auto const& f, auto i ) { + if ( _po_levels[i] == 0 ) + { + assert( _ntk.is_constant( _ntk.get_node( f ) ) ); + _po_levels[i] = 1; + } + else if ( ( _po_levels[i] - 1 ) % _ps.assume.num_phases != 0 ) // phase alignment + { + _po_levels[i] += _ps.assume.num_phases - ( ( _po_levels[i] - 1 ) % _ps.assume.num_phases ); + } + _depth = std::max( _depth, _po_levels[i] - 1 ); + } ); + assert( _depth % _ps.assume.num_phases == 0 ); + } + _outdated = true; _is_scheduled_ASAP = true; } @@ -720,18 +732,27 @@ class buffer_insertion */ void ALAP() { + assert( _depth % _ps.assume.num_phases == 0 ); _levels.reset( 0 ); _ntk.incr_trav_id(); - _ntk.foreach_po( [&]( auto const& f ) { + _ntk.foreach_po( [&]( auto const& f, auto i ) { + _po_levels[i] = _depth + 1; const auto n = _ntk.get_node( f ); - if ( !_ntk.is_constant( n ) && _ntk.visited( n ) != _ntk.trav_id() && ( !_ps.assume.balance_pis || !_ntk.is_pi( n ) ) ) + + if ( !_ntk.is_constant( n ) && _ntk.visited( n ) != _ntk.trav_id() ) { - _levels[n] = _depth - num_splitter_levels_po( n ); + _levels[n] = _depth - num_splitter_levels( n ); compute_levels_ALAP( n ); } } ); + /* dangling PIs */ + _ntk.foreach_pi( [&]( auto const& n ){ + if ( _ntk.visited( n ) != _ntk.trav_id() ) + _levels[n] = _ps.assume.ci_phases[0]; + } ); + _outdated = true; _is_scheduled_ASAP = false; } @@ -743,65 +764,102 @@ class buffer_insertion topo_view topo_ntk{ _ntk }; /* compute ALAP */ - _depth = UINT32_MAX - 1; - uint32_t min_level = UINT32_MAX - 1; + _depth = std::numeric_limits::max() - 1; + uint32_t min_level = std::numeric_limits::max() - 1; topo_ntk.foreach_node_reverse( [&]( auto const& n ) { - if ( !_ntk.is_constant( n ) && ( _ps.assume.branch_pis || !_ntk.is_pi( n ) ) ) + if ( !_ntk.is_constant( n ) && _ntk.fanout_size( n ) > 0 ) { compute_levels_ALAP_depth( f_ntk, n ); min_level = std::min( min_level, _levels[n] ); } } ); - if ( !_ps.assume.branch_pis && min_level != 0 ) - --min_level; + /* move everything down by `delta` */ + uint32_t delta = min_level; + /* phase alignment for PO: depth % num_phases = 0 */ + if ( ( _depth - delta ) % _ps.assume.num_phases != 0 ) + { + delta -= _ps.assume.num_phases - ( ( _depth - delta ) % _ps.assume.num_phases ); + } - /* normalize level */ - _ntk.foreach_node( [&]( auto const& n ) { - if ( !_ntk.is_constant( n ) ) - { - if ( _ps.assume.balance_pis && _ntk.is_pi( n ) ) + /* level of the lowest PI >= ci_phases[0] */ + while ( min_level - delta < _ps.assume.ci_phases[0] ) + { + delta -= _ps.assume.num_phases; + } + /* move PIs down to an acceptable level */ + if ( _ps.assume.balance_cios ) + { + _ntk.foreach_pi( [&]( auto const& n ) { + if ( _ntk.fanout_size( n ) == 0 ) { - _levels[n] = 0; + _levels[n] = _ps.assume.ci_phases[0]; } - else if ( !_ps.assume.balance_pis || !_ntk.is_pi( n ) ) + else if ( !_ntk.is_constant( n ) ) { - _levels[n] = _levels[n] - min_level; + _levels[n] = _levels[n] - delta; + for ( auto rit = _ps.assume.ci_phases.rbegin(); rit != _ps.assume.ci_phases.rend(); ++rit ) + { + if ( *rit <= _levels[n] ) + { + _levels[n] = *rit; + return; + } + } + assert( false ); } - _max_level[n] = _levels[n]; - } + } ); + } + else + { + _ntk.foreach_pi( [&]( auto const& n ) { + if ( _ntk.fanout_size( n ) == 0 ) + { + _levels[n] = _ps.assume.ci_phases[0]; + } + else if ( !_ntk.is_constant( n ) ) + { + _levels[n] = _levels[n] - delta; + while ( !is_acceptable_ci_lvl( _levels[n] ) ) + { + assert( _levels[n] > 0 ); + --_levels[n]; + } + } + } ); + } + + _ntk.foreach_gate( [&]( auto const& n ) { + _levels[n] = _levels[n] - delta; } ); + _depth -= delta; + assert( _depth % _ps.assume.num_phases == 0 ); + if ( _ps.assume.balance_cios ) + { + _ntk.foreach_po( [&]( auto const& f, auto i ) { + (void)f; + _po_levels[i] = _depth + 1; + } ); + } + else + { + _ntk.foreach_po( [&]( auto const& f, auto i ) { + if ( _ntk.is_constant( _ntk.get_node( f ) ) ) + _po_levels[i] = 1; + else + { + _po_levels[i] = _levels[f] + num_splitter_levels( _ntk.get_node( f ) ); + if ( _po_levels[i] % _ps.assume.num_phases > 0 ) + _po_levels[i] += _ps.assume.num_phases - ( _po_levels[i] % _ps.assume.num_phases ); + ++_po_levels[i]; + } + } ); + } - _depth -= min_level; _outdated = true; _is_scheduled_ASAP = false; } - void depth_optimal_schedule( fanout_view const& f_ntk ) - { - /* Optimum-depth ALAP scheduling */ - ALAP_depth( f_ntk ); - count_buffers(); - auto const num_buf_ALAP_depth = num_buffers(); - - if ( _ps.scheduling == buffer_insertion_params::ALAP_depth ) - return; - - /* Optimum-depth ALAP scheduling: no balanced trees */ - ASAP_depth( f_ntk, false ); - count_buffers(); - auto const num_buf_ASAP_depth = num_buffers(); - - if ( _ps.scheduling == buffer_insertion_params::ASAP_depth ) - return; - - /* Revert to optimum-depth ALAP scheduling if better */ - if ( num_buf_ALAP_depth < num_buf_ASAP_depth ) - { - ALAP_depth( f_ntk ); - } - } - private: uint32_t compute_levels_ASAP( node const& n ) { @@ -811,21 +869,21 @@ class buffer_insertion } _ntk.set_visited( n, _ntk.trav_id() ); - if ( _ntk.is_constant( n ) || _ntk.is_pi( n ) ) + if ( _ntk.is_constant( n ) ) { return _levels[n] = 0; } + else if ( _ntk.is_pi( n ) ) + { + return _levels[n] = _ps.assume.ci_phases[0]; + } uint32_t level{ 0 }; _ntk.foreach_fanin( n, [&]( auto const& fi ) { auto const ni = _ntk.get_node( fi ); if ( !_ntk.is_constant( ni ) ) { - auto fi_level = compute_levels_ASAP( ni ); - if ( _ps.assume.branch_pis || !_ntk.is_pi( ni ) ) - { - fi_level += num_splitter_levels( ni ); - } + auto fi_level = compute_levels_ASAP( ni ) + num_splitter_levels( ni ); level = std::max( level, fi_level ); } } ); @@ -833,29 +891,70 @@ class buffer_insertion return _levels[n] = level + 1; } - void compute_levels_ALAP( node const& n ) + bool is_acceptable_ci_lvl( uint32_t lvl ) const { - _ntk.set_visited( n, _ntk.trav_id() ); - - _ntk.foreach_fanin( n, [&]( auto const& fi ) { - auto const ni = _ntk.get_node( fi ); - if ( !_ntk.is_constant( ni ) ) + if ( _ps.assume.balance_cios ) + { + for ( auto const& p : _ps.assume.ci_phases ) { - if ( _ps.assume.balance_pis && _ntk.is_pi( ni ) ) - { - assert( _levels[n] > 0 ); - _levels[ni] = 0; - } - else if ( _ps.assume.branch_pis || !_ntk.is_pi( ni ) ) - { - assert( _levels[n] > num_splitter_levels( ni ) ); - auto fi_level = _levels[n] - num_splitter_levels( ni ) - 1; - if ( _ntk.visited( ni ) != _ntk.trav_id() || _levels[ni] > fi_level ) - { - _levels[ni] = fi_level; - compute_levels_ALAP( ni ); - } - } + if ( lvl == p ) + return true; + } + return false; + } + else + { + for ( auto const& p : _ps.assume.ci_phases ) + { + // for example, if num_phases = 4, ci_phases = {5}, + // then lvl = 1 will not be acceptable, but lvl = 5 or lvl = 9 will + if ( lvl % _ps.assume.num_phases == p % _ps.assume.num_phases && lvl >= p ) + return true; + } + return false; + } + } + + void compute_levels_ALAP( node const& n ) + { + _ntk.set_visited( n, _ntk.trav_id() ); + + if ( _ntk.is_pi( n ) ) + { + if ( _ps.assume.balance_cios ) + { + for ( auto rit = _ps.assume.ci_phases.rbegin(); rit != _ps.assume.ci_phases.rend(); ++rit ) + { + if ( *rit <= _levels[n] ) + { + _levels[n] = *rit; + return; + } + } + assert( false ); + } + else + { + while ( !is_acceptable_ci_lvl( _levels[n] ) ) + { + assert( _levels[n] > 0 ); + --_levels[n]; + } + } + return; + } + + _ntk.foreach_fanin( n, [&]( auto const& fi ) { + auto const ni = _ntk.get_node( fi ); + if ( !_ntk.is_constant( ni ) ) + { + assert( _levels[n] > num_splitter_levels( ni ) ); + auto fi_level = _levels[n] - num_splitter_levels( ni ) - 1; + if ( _ntk.visited( ni ) != _ntk.trav_id() || _levels[ni] > fi_level ) + { + _levels[ni] = fi_level; + compute_levels_ALAP( ni ); + } } } ); } @@ -874,13 +973,6 @@ class buffer_insertion level_assignment.push_back( _levels[f] ); } ); - /* dangling PI */ - if ( level_assignment.empty() ) - { - _levels[n] = _depth; - return; - } - /* sort by descending order of levels */ std::sort( level_assignment.begin(), level_assignment.end(), std::greater() ); @@ -906,10 +998,21 @@ class buffer_insertion /* search for a feasible level for node n */ --last_level; - while ( nodes_in_level > 1 ) + if ( _ntk.is_pi( n ) ) { - nodes_in_level = std::ceil( float( nodes_in_level ) / float( _ps.assume.splitter_capacity ) ); - --last_level; + while ( nodes_in_level > _ps.assume.ci_capacity ) + { + nodes_in_level = std::ceil( float( nodes_in_level ) / float( _ps.assume.splitter_capacity ) ); + --last_level; + } + } + else + { + while ( nodes_in_level > 1 ) + { + nodes_in_level = std::ceil( float( nodes_in_level ) / float( _ps.assume.splitter_capacity ) ); + --last_level; + } } _levels[n] = last_level; @@ -917,18 +1020,11 @@ class buffer_insertion void compute_mobility_ASAP( fanout_view const& ntk, node const& n, node_map& mobility, bool try_regular ) { + assert( mobility[n] <= _levels[n] ); /* commit ASAP scheduling */ uint32_t level_n = _levels[n] - mobility[n]; _levels[n] = level_n; - if ( !_ps.assume.branch_pis && _ntk.is_pi( n ) ) - { - ntk.foreach_fanout( n, [&]( auto const& f ) { - mobility[f] = std::min( mobility[f], _levels[f] - level_n - 1 ); - } ); - return; - } - /* try to fit a balanced tree */ if ( try_regular ) { @@ -954,8 +1050,14 @@ class buffer_insertion level_assignment.reserve( _ntk.fanout_size( n ) ); /* if node is a PO, add levels */ - for ( auto i = ntk.fanout( n ).size(); i < ntk.fanout_size( n ); ++i ) - level_assignment.push_back( { 0, _depth + 1, 0 } ); + if ( ntk.fanout( n ).size() < ntk.fanout_size( n ) ) + { + ntk.foreach_po( [&]( auto const& f, auto i ){ + if ( ntk.get_node( f ) == n ) + level_assignment.push_back( { i, _depth + 1, 0 } ); + } ); + assert( level_assignment.size() == ntk.fanout_size( n ) - ntk.fanout( n ).size() ); + } /* get fanout levels */ ntk.foreach_fanout( n, [&]( auto const& f ) { @@ -1012,7 +1114,7 @@ class buffer_insertion uint32_t mobility_update = 0; for ( auto i = level_n + 1; i < last_level; ++i ) { - if ( nodes_in_level == 1 ) + if ( nodes_in_level == 1 || ( _ntk.is_pi( n ) && nodes_in_level <= _ps.assume.ci_capacity ) ) ++mobility_update; nodes_in_level = std::ceil( float( nodes_in_level ) / float( _ps.assume.splitter_capacity ) ); } @@ -1020,98 +1122,29 @@ class buffer_insertion /* update mobilities */ for ( auto const& v : level_assignment ) { - if ( v[0] != 0 ) + if ( v[1] != _depth + 1 ) { mobility[v[0]] = std::min( mobility[v[0]], v[2] + mobility_update ); } } - } -#pragma endregion -#pragma region Compute timeframe - /*! \brief Compute the earliest and latest possible timeframe by eager ASAP and ALAP */ - uint32_t compute_timeframe( uint32_t max_depth ) - { - _timeframes.reset( std::make_pair( 0, 0 ) ); - uint32_t min_depth{ 0 }; - - _ntk.incr_trav_id(); - _ntk.foreach_po( [&]( auto const& f ) { - auto const no = _ntk.get_node( f ); - auto clevel = compute_levels_ASAP_eager( no ) + ( _ntk.fanout_size( no ) > 1 ? 1 : 0 ); - min_depth = std::max( min_depth, clevel ); - } ); - - _ntk.incr_trav_id(); - _ntk.foreach_po( [&]( auto const& f ) { - const auto n = _ntk.get_node( f ); - if ( !_ntk.is_constant( n ) && _ntk.visited( n ) != _ntk.trav_id() && ( !_ps.assume.balance_pis || !_ntk.is_pi( n ) ) ) - { - _timeframes[n].second = max_depth - ( _ntk.fanout_size( n ) > 1 ? 1 : 0 ); - compute_levels_ALAP_eager( n ); - } - } ); - - return min_depth; - } - - uint32_t compute_levels_ASAP_eager( node const& n ) - { - if ( _ntk.visited( n ) == _ntk.trav_id() ) - { - return _timeframes[n].first; - } - _ntk.set_visited( n, _ntk.trav_id() ); - - if ( _ntk.is_constant( n ) || _ntk.is_pi( n ) ) + /* update po_level, if possible */ + if ( !_ps.assume.balance_cios ) { - return _timeframes[n].first = 0; - } - - uint32_t level{ 0 }; - _ntk.foreach_fanin( n, [&]( auto const& fi ) { - auto const ni = _ntk.get_node( fi ); - if ( !_ntk.is_constant( ni ) ) + for ( auto const& v : level_assignment ) { - auto fi_level = compute_levels_ASAP_eager( ni ); - if ( _ps.assume.branch_pis || !_ntk.is_pi( ni ) ) + if ( v[1] == _depth + 1 ) { - fi_level += _ntk.fanout_size( ni ) > 1 ? 1 : 0; + _po_levels[v[0]] = std::max( _po_levels[v[0]], _depth + 1 - v[2] - mobility_update ); } - level = std::max( level, fi_level ); - } - } ); - - return _timeframes[n].first = level + 1; - } - - void compute_levels_ALAP_eager( node const& n ) - { - _ntk.set_visited( n, _ntk.trav_id() ); - - _ntk.foreach_fanin( n, [&]( auto const& fi ) { - auto const ni = _ntk.get_node( fi ); - if ( !_ntk.is_constant( ni ) ) - { - if ( _ps.assume.balance_pis && _ntk.is_pi( ni ) ) - { - assert( _timeframes[n].second > 0 ); - _timeframes[ni].second = 0; - } - else if ( _ps.assume.branch_pis || !_ntk.is_pi( ni ) ) + else { - assert( _timeframes[n].second > num_splitter_levels( ni ) ); - auto fi_level = _timeframes[n].second - ( _ntk.fanout_size( ni ) > 1 ? 2 : 1 ); - if ( _ntk.visited( ni ) != _ntk.trav_id() || _timeframes[ni].second > fi_level ) - { - _timeframes[ni].second = fi_level; - compute_levels_ALAP_eager( ni ); - } + break; } } - } ); + } } -#pragma +#pragma endregion #pragma region Dump buffered network public: @@ -1139,7 +1172,6 @@ class buffer_insertion buffers[_ntk.get_constant( false )].emplace_back( 1, bufntk.get_constant( false ) ); if ( _ntk.get_node( _ntk.get_constant( false ) ) != _ntk.get_node( _ntk.get_constant( true ) ) ) { - std::cerr << "[w] ntk has different nodes for const0 and const1 -- poorly tested case, might be buggy.\n"; node_to_signal[_ntk.get_constant( true )] = bufntk.get_constant( true ); buffers[_ntk.get_constant( true )].emplace_back( 1, bufntk.get_constant( true ) ); } @@ -1147,28 +1179,16 @@ class buffer_insertion /* PIs */ _ntk.foreach_pi( [&]( auto const& n ) { node_to_signal[n] = bufntk.create_pi(); + create_buffer_chain( bufntk, buffers, n, node_to_signal[n] ); } ); - if ( _ps.assume.branch_pis ) - { - _ntk.foreach_pi( [&]( auto const& n ) { - create_buffer_chain( bufntk, buffers, n, node_to_signal[n] ); - } ); - } - else - { - _ntk.foreach_pi( [&]( auto const& n ) { - buffers[n].emplace_back( 1, node_to_signal[n] ); - } ); - } /* gates: assume topological order */ _ntk.foreach_gate( [&]( auto const& n ) { std::vector children; _ntk.foreach_fanin( n, [&]( auto const& fi ) { - auto ni = _ntk.get_node( fi ); buf_signal s; - if ( _ntk.is_constant( ni ) || ( !_ps.assume.branch_pis && _ntk.is_pi( ni ) ) ) - s = node_to_signal[ni]; + if ( _ntk.is_constant( _ntk.get_node( fi ) ) ) + s = node_to_signal[fi]; else s = get_buffer_at_relative_depth( bufntk, buffers[fi], _levels[n] - _levels[fi] - 1 ); children.push_back( _ntk.is_complemented( fi ) ? !s : s ); @@ -1178,129 +1198,17 @@ class buffer_insertion } ); /* POs */ - if ( _ps.assume.balance_pos ) - { - _ntk.foreach_po( [&]( auto const& f, uint32_t i ) { - auto n = _ntk.get_node( f ); - if ( _ntk.is_constant( n ) || ( !_ps.assume.branch_pis && _ntk.is_pi( n ) ) ) // not branch => not balance - { - if ( _ntk.is_pi( n ) && _ntk.is_complemented( f ) ) - std::cerr << "[w] an explicit inverter between non-branched PI " << n << " and PO " << i << " is neglected.\n"; - bufntk.create_po( _ntk.is_complemented( f ) ? !node_to_signal[f] : node_to_signal[f] ); - } - else if ( _ntk.fanout_size( n ) == 1 && _depth == _levels[f] ) - { - if ( _ntk.is_complemented( f ) ) - bufntk.invert( bufntk.get_node( node_to_signal[f] ) ); - bufntk.create_po( node_to_signal[f] ); - } - else - { - buf_signal s = get_buffer_or_inverter( bufntk, buffers[f], _depth - _levels[f], _ntk.is_complemented( f ) ); - bufntk.create_po( s ); - } - } ); - } - else // !_ps.assume.balance_pos - { - std::set checked; - unordered_node_map, Ntk> inverted_buffers( _ntk ); - _ntk.foreach_po( [&]( auto const& f ) { - auto n = _ntk.get_node( f ); - if ( !_ntk.is_constant( n ) && !( _ntk.is_pi( n ) && !_ps.assume.branch_pis ) && _ntk.fanout_size( n ) > 1 ) - { - if ( checked.find( n ) == checked.end() ) - { - checked.insert( n ); - /* count available slots in buffers[n] */ - uint32_t slots{ 0u }; - for ( auto const& bufs : buffers[n] ) - { - slots += _ps.assume.splitter_capacity - bufntk.fanout_size( bufntk.get_node( bufs.back() ) ); - } - slots -= _ps.assume.splitter_capacity - 1; // buffers[n][0] is n itself - - /* add enough buffers */ - if ( _external_ref_count[n] > _external_ref_count_neg[n] ) /* there are pos POs */ - { - if ( buffers[n].size() == 1 ) - { - buffers[n].emplace_back( 1, bufntk.create_buf( buffers[n][0].back() ) ); - slots += _ps.assume.splitter_capacity - 1; - } - uint32_t needed_slots = _external_ref_count[n] - _external_ref_count_neg[n]; - if ( _external_ref_count_neg[n] > 0 ) - ++needed_slots; - while ( slots < needed_slots ) - { - auto p = get_lowest_spot( bufntk, buffers[n] ); - add_splitter( bufntk, buffers[n], p.first, p.second ); - slots += _ps.assume.splitter_capacity - 1; - } - } - - /* add inverted buffer tree */ - if ( _external_ref_count_neg[n] > 0 ) - { - auto p = get_lowest_spot( bufntk, buffers[n] ); - buf_signal const& s = p.first; - uint32_t const& rd = p.second; - if ( _external_ref_count_neg[n] == _ntk.fanout_size( n ) ) - { - bufntk.invert( bufntk.get_node( s ) ); - buffers[n][rd].remove( s ); - inverted_buffers[n].push_back( s ); - } - else - { - inverted_buffers[n].push_back( bufntk.create_buf( !s ) ); - } - uint32_t inverted_slots{ _ps.assume.splitter_capacity }; - while ( inverted_slots < _external_ref_count_neg[n] ) - { - buf_signal s = get_first_spot( bufntk, inverted_buffers[n] ); - inverted_buffers[n].push_back( bufntk.create_buf( s ) ); - inverted_slots += _ps.assume.splitter_capacity - 1; - } - } - - /* check */ - uint32_t nbufs = 0; - for ( auto l : buffers[n] ) - nbufs += l.size(); - assert( nbufs - 1 + inverted_buffers[n].size() == _num_buffers[n] ); - } - } - } ); - - _ntk.foreach_po( [&]( auto const& f, uint32_t i ) { - auto n = _ntk.get_node( f ); - if ( _ntk.is_constant( n ) || ( _ntk.is_pi( n ) && ( !_ps.assume.branch_pis || _ntk.fanout_size( n ) == 1 ) ) ) - { - if ( _ntk.is_pi( n ) && _ntk.is_complemented( f ) ) - std::cerr << "[w] an explicit inverter between non-branched PI " << n << " and PO " << i << " is neglected.\n"; - bufntk.create_po( _ntk.is_complemented( f ) ? !node_to_signal[f] : node_to_signal[f] ); - } - else if ( _ntk.fanout_size( n ) == 1 ) - { - if ( _ntk.is_complemented( f ) ) - bufntk.invert( bufntk.get_node( node_to_signal[f] ) ); - bufntk.create_po( node_to_signal[f] ); - } - else - { - buf_signal s = _ntk.is_complemented( f ) ? get_first_spot( bufntk, inverted_buffers[n] ) : get_lowest_spot( bufntk, buffers[n] ).first; - assert( bufntk.is_buf( bufntk.get_node( s ) ) ); - bufntk.create_po( s ); - } - } ); - } + _ntk.foreach_po( [&]( auto const& f, auto i ) { + buf_signal s; + if ( _ntk.is_constant( _ntk.get_node( f ) ) ) + s = node_to_signal[f]; + else + s = get_buffer_at_relative_depth( bufntk, buffers[f], _po_levels[i] - _levels[f] - 1 ); + assert( _ps.assume.ignore_co_negation ); + bufntk.create_po( _ntk.is_complemented( f ) ? !s : s ); + } ); - // assert( bufntk.size() - bufntk.num_pis() - bufntk.num_gates() - 1 == num_buffers() ); - if ( bufntk.size() - bufntk.num_pis() - bufntk.num_gates() - 1 != num_buffers() ) - { - std::cerr << "[w] actual #bufs = " << ( bufntk.size() - bufntk.num_pis() - bufntk.num_gates() - 1 ) << ", counted = " << num_buffers() << "\n"; - } + assert( bufntk.size() - bufntk.num_pis() - bufntk.num_gates() - 1 == num_buffers() ); } private: @@ -1310,70 +1218,25 @@ class buffer_insertion if ( _ntk.fanout_size( n ) == 0 ) return; /* dangling */ - auto const& fanout_info = _fanouts[n]; - assert( fanout_info.size() > 0u ); - - if ( _external_ref_count[n] > 0u && !_ps.assume.balance_pos ) - { - if ( _ntk.fanout_size( n ) == _external_ref_count[n] ) - { - if ( _external_ref_count[n] > _external_ref_count_neg[n] ) - buffers[n].resize( std::ceil( std::log( _external_ref_count[n] - _external_ref_count_neg[n] ) / std::log( _ps.assume.splitter_capacity ) ) + 1 ); - else - buffers[n].resize( _external_ref_count_neg[n] > 1 ? 2 : 1 ); - } - else - { - auto it = fanout_info.rbegin(); - while ( it->fanouts.size() == 0u ) - ++it; - buffers[n].resize( it->relative_depth ); - } - } - else - { - buffers[n].resize( fanout_info.back().relative_depth ); - } + assert( _fanouts[n].size() > 0u ); + buffers[n].resize( _fanouts[n].back().relative_depth ); auto& fot = buffers[n]; - - typename BufNtk::signal fi = s; - fot[0].push_back( fi ); + fot[0].push_back( s ); for ( auto i = 1u; i < fot.size(); ++i ) { - fi = bufntk.create_buf( fi ); - fot[i].push_back( fi ); + fot[i].push_back( bufntk.create_buf( fot[i-1].back() ) ); } } - template - typename BufNtk::signal get_buffer_or_inverter( BufNtk& bufntk, FOT& fot, uint32_t rd, bool inverted ) const - { - assert( rd == fot.size() - 1 ); // must be at the highest level - for ( auto it = fot[rd].begin(); it != fot[rd].end(); ++it ) - { - auto b = bufntk.get_node( *it ); - if ( bufntk.fanout_size( b ) < _ps.assume.splitter_capacity ) - { - if ( bufntk.is_not( b ) != inverted ) - { - if ( bufntk.fanout_size( b ) == 0 ) - bufntk.invert( b ); - else - continue; - } - return *it; - } - } - typename BufNtk::signal b_lower = get_buffer_at_relative_depth( bufntk, fot, rd - 1 ); - typename BufNtk::signal b = bufntk.create_buf( inverted ? !b_lower : b_lower ); - fot[rd].push_back( b ); - return b; - } - template typename BufNtk::signal get_buffer_at_relative_depth( BufNtk& bufntk, FOT& fot, uint32_t rd ) const { typename BufNtk::signal b = fot[rd].back(); + if ( rd == 0 && bufntk.is_pi( bufntk.get_node( b ) ) ) + { + assert( bufntk.fanout_size( bufntk.get_node( b ) ) < _ps.assume.ci_capacity ); + return b; + } if ( bufntk.fanout_size( bufntk.get_node( b ) ) == _ps.assume.splitter_capacity ) { assert( rd > 0 ); @@ -1383,68 +1246,92 @@ class buffer_insertion } return b; } +#pragma endregion - template - std::pair get_lowest_spot( BufNtk& bufntk, FOT& fot ) const +#pragma region Post-dump optimization +public: +template +uint32_t remove_buffer_chains( BufNtk& ntk ) const +{ + static_assert( is_buffered_network_type_v, "BufNtk is not a buffered network" ); + + uint32_t max_chain = 0; + ntk.incr_trav_id(); + ntk.foreach_po( [&]( auto f ){ + remove_buffer_chains_rec( ntk, ntk.get_node( f ), 0, max_chain ); + } ); + return max_chain; +} + +private: +template +std::pair remove_buffer_chains_rec( BufNtk& ntk, typename BufNtk::node n, typename BufNtk::node parent, uint32_t& max_chain ) const +{ + if ( ntk.visited( n ) == ntk.trav_id() ) + return std::make_pair( 0, n ); + ntk.set_visited( n, ntk.trav_id() ); + if ( ntk.is_pi( n ) ) + return std::make_pair( 0, n ); + + if ( ntk.is_buf( n ) ) { - for ( auto rd = 1u; rd < fot.size(); ++rd ) + // splitter + if ( ntk.fanout_size( n ) > 1 ) { - for ( auto it = fot[rd].begin(); it != fot[rd].end(); ++it ) + ntk.foreach_fanin( n, [&]( auto f ){ + remove_buffer_chains_rec( ntk, ntk.get_node( f ), n, max_chain ); + } ); + return std::make_pair( 0, n ); + } + + // single-output buffer: can be part of a chain to be removed + std::pair ret; + ntk.foreach_fanin( n, [&]( auto f ){ + auto [count, origin] = remove_buffer_chains_rec( ntk, ntk.get_node( f ), n, max_chain ); + if ( count % _ps.assume.num_phases == _ps.assume.num_phases - 1 ) { - typename BufNtk::signal& b = *it; - if ( bufntk.fanout_size( bufntk.get_node( b ) ) < _ps.assume.splitter_capacity ) + // TODO: take care of complementation + if ( parent != 0 ) + { + ntk.replace_in_node( parent, n, ntk.make_signal( origin ) ); + ntk.take_out_node( n ); + } + else { - return { b, rd }; + ntk.replace_in_outputs( n, ntk.make_signal( origin ) ); + ntk.take_out_node( n ); } + max_chain = std::max( count + 1, max_chain ); } - } - assert( false ); - } - - template - BufSig get_first_spot( BufNtk const& bufntk, std::list const& bufs ) const - { - auto it = bufs.begin(); - while ( it != bufs.end() ) - { - if ( bufntk.fanout_size( bufntk.get_node( *it ) ) < _ps.assume.splitter_capacity ) - return *it; - ++it; - } - assert( false ); + ret = std::make_pair( count + 1, origin ); + } ); + return ret; } - template - void add_splitter( BufNtk& bufntk, FOT& fot, typename BufNtk::signal b, uint32_t rd ) const - { - if ( rd == fot.size() - 1 ) - fot.emplace_back( 1, bufntk.create_buf( b ) ); - else - fot[rd + 1].push_back( bufntk.create_buf( b ) ); - } + // gate + ntk.foreach_fanin( n, [&]( auto f ){ + remove_buffer_chains_rec( ntk, ntk.get_node( f ), n, max_chain ); + } ); + return std::make_pair( 0, n ); +} #pragma endregion public: - /*! \brief Optimize with chunked movement using the specified optimization policy. - * - * For more information, please refer to [1]. - * - * [1] Irredundant Buffer and Splitter Insertion and Scheduling-Based Optimization for AQFP Circuits. - * Siang-Yun Lee et. al. IWLS 2021. */ + /*! \brief Optimize with chunked movement using the specified optimization policy. */ void optimize() { if ( _ps.optimization_effort == buffer_insertion_params::none ) { return; } - else if ( _ps.optimization_effort == buffer_insertion_params::optimal ) - { - if constexpr ( has_get_network_name_v ) - optimize_with_smt( _ntk.get_network_name() ); - else - optimize_with_smt( "" ); - return; - } + //else if ( _ps.optimization_effort == buffer_insertion_params::optimal ) + //{ + // if constexpr ( has_get_network_name_v ) + // optimize_with_smt( _ntk.get_network_name() ); + // else + // optimize_with_smt( "" ); + // return; + //} if ( _outdated ) { @@ -1456,8 +1343,7 @@ class buffer_insertion { updated = find_and_move_chunks(); } while ( updated && _ps.optimization_effort == buffer_insertion_params::until_sat ); - - adjust_depth(); + single_gate_movement(); } #pragma region Chunked movement @@ -1468,26 +1354,31 @@ class buffer_insertion node o; // outside node }; + struct po_interface + { + node c; // chunk node + uint32_t o; // PO index + }; + struct chunk { uint32_t id; std::vector members{}; std::vector input_interfaces{}; std::vector output_interfaces{}; + std::vector po_interfaces{}; int32_t slack{ std::numeric_limits::max() }; int32_t benefits{ 0 }; }; bool is_ignored( node const& n ) const { - return _ntk.is_constant( n ) || ( !_ps.assume.branch_pis && _ntk.is_pi( n ) ); + return _ntk.is_constant( n ); } bool is_fixed( node const& n ) const { - if ( _ps.assume.balance_pis ) - return _ntk.is_pi( n ); - return false; + return _ps.assume.balance_cios && _ps.assume.ci_phases.size() == 1 && _ntk.is_pi( n ); } bool find_and_move_chunks() @@ -1520,16 +1411,42 @@ class buffer_insertion } ); count_buffers(); - // assert( num_buffers() <= num_buffers_before ); + assert( num_buffers() <= num_buffers_before ); return updated && num_buffers() < num_buffers_before; } + void single_gate_movement() + { + _ntk.foreach_node( [&]( auto const& n ) { + if ( is_ignored( n ) || is_fixed( n ) ) + return; + + _ntk.incr_trav_id(); + chunk c{ _ntk.trav_id() }; + c.members.emplace_back( n ); + _ntk.foreach_fanin( n, [&]( auto const& fi ) { + auto const ni = _ntk.get_node( fi ); + if ( !is_ignored( ni ) ) + c.input_interfaces.push_back( { n, ni } ); + } ); + auto const& fanout_info = _fanouts[n]; + for ( auto it = fanout_info.begin(); it != fanout_info.end(); ++it ) + { + for ( auto it2 = it->fanouts.begin(); it2 != it->fanouts.end(); ++it2 ) + c.output_interfaces.push_back( { n, *it2 } ); + for ( auto it2 = it->extrefs.begin(); it2 != it->extrefs.end(); ++it2 ) + c.po_interfaces.push_back( { n, *it2 } ); + } + + if ( !analyze_chunk_down( c ) ) + analyze_chunk_up( c ); + } ); + } + void recruit( node const& n, chunk& c ) { if ( _ntk.visited( n ) == c.id ) return; - // if ( c.members.size() > _ps.max_chunk_size ) // TODO: Directly returning might be problematic - // return; assert( _ntk.visited( n ) <= _start_id ); assert( !is_fixed( n ) ); @@ -1560,43 +1477,92 @@ class buffer_insertion void recruit_fanouts( node const& n, chunk& c ) { auto const& fanout_info = _fanouts[n]; - if ( fanout_info.size() == 0 ) + if ( fanout_info.size() == 0 ) /* dangling */ return; - if ( _ntk.fanout_size( n ) == _external_ref_count[n] ) // only POs + auto it = fanout_info.begin(); + if ( _ntk.fanout_size( n ) == 1 ) /* single fanout */ { - c.output_interfaces.push_back( { n, n } ); // PO interface + assert( fanout_info.size() == 1 ); + if ( it->fanouts.size() == 1 ) /* single gate fanout */ + { + if ( it->relative_depth == 1 ) + recruit( it->fanouts.front(), c ); + else + c.output_interfaces.push_back( { n, it->fanouts.front() } ); + } + else /* single PO fanout */ + { + assert( it->extrefs.size() == 1 ); + c.po_interfaces.push_back( { n, it->extrefs.front() } ); + } + return; } - else if ( fanout_info.size() == 1 ) // single gate fanout + + for ( ; it != fanout_info.end(); ++it ) { - auto const& no = fanout_info.front().fanouts.front(); - if ( is_fixed( no ) ) - c.output_interfaces.push_back( { n, no } ); - else if ( fanout_info.front().relative_depth == 1 ) - recruit( no, c ); - else - c.output_interfaces.push_back( { n, no } ); + for ( auto it2 = it->extrefs.begin(); it2 != it->extrefs.end(); ++it2 ) + c.po_interfaces.push_back( { n, *it2 } ); } - else + it = fanout_info.begin(); + + if ( _ps.assume.ci_capacity > 1 && _ntk.is_pi( n ) ) { - for ( auto it = fanout_info.begin(); it != fanout_info.end(); ++it ) + if ( it->relative_depth == 1 ) + { + for ( auto it2 = it->fanouts.begin(); it2 != it->fanouts.end(); ++it2 ) + recruit( *it2, c ); + it++; + } + if ( it->relative_depth == 2 && fanout_info.front().num_edges == _ps.assume.ci_capacity ) + { + assert( fanout_info.front().relative_depth == 1 ); + for ( auto it2 = it->fanouts.begin(); it2 != it->fanouts.end(); ++it2 ) + recruit( *it2, c ); + it++; + } + for ( ; it != fanout_info.end(); ++it ) { for ( auto it2 = it->fanouts.begin(); it2 != it->fanouts.end(); ++it2 ) { - if ( is_fixed( *it2 ) ) - c.output_interfaces.push_back( { n, *it2 } ); - else if ( it->relative_depth == 2 ) - recruit( *it2, c ); - else if ( _ntk.visited( *it2 ) != c.id ) + if ( _ntk.visited( *it2 ) != c.id ) c.output_interfaces.push_back( { n, *it2 } ); } } + return; + } + + for ( ; it != fanout_info.end(); ++it ) + { + for ( auto it2 = it->fanouts.begin(); it2 != it->fanouts.end(); ++it2 ) + { + if ( it->relative_depth == 2 ) + recruit( *it2, c ); + else if ( _ntk.visited( *it2 ) != c.id ) + c.output_interfaces.push_back( { n, *it2 } ); + } } } bool are_close( node const& ni, node const& n ) { auto const& fanout_info = _fanouts[ni]; + + if ( _ps.assume.ci_capacity > 1 && _ntk.is_pi( ni ) ) + { + auto const& front_fanouts = fanout_info.front().fanouts; + if ( fanout_info.front().relative_depth == 1 ) + { + if ( std::find( front_fanouts.begin(), front_fanouts.end(), n ) != front_fanouts.end() ) + return true; + if ( fanout_info.front().num_edges < _ps.assume.ci_capacity ) + return false; + } + else if ( _ntk.fanout_size( ni ) <= _ps.assume.ci_capacity ) + return false; + assert( fanout_info.size() > 1 ); + } + if ( fanout_info.size() == 1 && fanout_info.front().relative_depth == 1 ) { assert( fanout_info.front().fanouts.front() == n ); @@ -1629,7 +1595,7 @@ class buffer_insertion } for ( int i = 0; i < c.output_interfaces.size(); ++i ) { - if ( _ntk.visited( c.output_interfaces[i].o ) == c.id && c.output_interfaces[i].o != c.output_interfaces[i].c ) + if ( _ntk.visited( c.output_interfaces[i].o ) == c.id ) { c.output_interfaces.erase( c.output_interfaces.begin() + i ); --i; @@ -1639,6 +1605,9 @@ class buffer_insertion bool analyze_chunk_down( chunk c ) { + count_buffers(); + auto buffers_before = num_buffers(); + std::set marked_oi; for ( auto oi : c.output_interfaces ) { @@ -1659,25 +1628,69 @@ class buffer_insertion break; } c.slack = std::min( c.slack, int32_t( rd - lowest ) ); - if ( c.slack == rd - lowest ) - mark_occupied( ii.o, lowest ); // TODO: may be inaccurate - if ( _fanouts[ii.o].back().relative_depth == rd && _fanouts[ii.o].back().num_edges == 1 ) // is the only highest fanout + pseudo_move( ii.o, ii.c, rd, lowest ); + if ( _fanouts[ii.o].back().relative_depth == rd && _fanouts[ii.o].back().num_edges == 0 ) // `ii.c` is the last highest fanout of `ii.o` { ++c.benefits; } } + if ( c.po_interfaces.size() > 0 ) + { + if ( !_ps.assume.balance_cios && c.slack >= _ps.assume.num_phases ) + { + c.slack -= c.slack % _ps.assume.num_phases; + } + else + { + for ( auto poi : c.po_interfaces ) + { + if ( marked_oi.find( poi.c ) == marked_oi.end() ) + --c.benefits; + } + } + } + + std::vector pi_members; for ( auto m : c.members ) - c.slack = std::min( c.slack, int32_t( _ntk.is_pi( m ) ? _levels[m] : _levels[m] - 1 ) ); + { + if ( _ntk.is_pi( m ) ) + { + pi_members.emplace_back( m ); + c.slack = std::min( c.slack, int32_t( _levels[m] ) ); + } + } + if ( pi_members.size() > 0 ) + { + while ( c.slack > 0 ) + { + bool ok = true; + for ( auto m : pi_members ) + { + if ( _levels[m] < c.slack || !is_acceptable_ci_lvl( _levels[m] - c.slack ) ) + { + ok = false; + break; + } + } + if ( !ok ) + --c.slack; + else + break; + } + } if ( c.benefits > 0 && c.slack > 0 ) { - count_buffers(); bool legal = true; - auto buffers_before = num_buffers(); for ( auto m : c.members ) _levels[m] -= c.slack; + if ( !_ps.assume.balance_cios && c.slack >= _ps.assume.num_phases ) + { + for ( auto poi : c.po_interfaces ) + _po_levels[poi.o] -= c.slack; + } for ( auto m : c.members ) update_fanout_info( m ); for ( auto ii : c.input_interfaces ) @@ -1691,6 +1704,11 @@ class buffer_insertion /* UNDO */ for ( auto m : c.members ) _levels[m] += c.slack; + if ( !_ps.assume.balance_cios && c.slack >= _ps.assume.num_phases ) + { + for ( auto poi : c.po_interfaces ) + _po_levels[poi.o] += c.slack; + } for ( auto m : c.members ) update_fanout_info( m ); for ( auto ii : c.input_interfaces ) @@ -1704,9 +1722,10 @@ class buffer_insertion } else { - /* reset fanout_infos of input_interfaces because num_edges may be modified by mark_occupied */ + /* reset fanout_infos of input_interfaces because num_edges may be modified by pseudo_move */ for ( auto ii : c.input_interfaces ) update_fanout_info( ii.o ); + _outdated = true; return false; } } @@ -1716,33 +1735,80 @@ class buffer_insertion { auto const& fanout_info = _fanouts[n]; assert( fanout_info.size() ); - assert( _ntk.fanout_size( n ) != _external_ref_count[n] ); - if ( fanout_info.size() == 1 ) + + auto it = fanout_info.begin(); + uint32_t rd_prev = 1; + uint32_t num_splitters_prev = 1; + if ( _ntk.is_pi( n ) && _ps.assume.ci_capacity > 1 ) + { + if ( it->num_edges <= _ps.assume.ci_capacity ) + return 1; + else + num_splitters_prev = _ps.assume.ci_capacity - it->fanouts.size() - it->extrefs.size(); + } + else if ( fanout_info.size() == 1 ) // single fanout { - assert( fanout_info.front().fanouts.size() == 1 ); return 1; } - auto it = fanout_info.begin(); - ++it; - while ( it != fanout_info.end() && it->num_edges == _ps.assume.splitter_capacity ) - ++it; - if ( it == fanout_info.end() ) // full fanout tree - return fanout_info.back().relative_depth + 1; - --it; // the last full layer - return it->relative_depth + 1; + + ++it; // skip the first splitter at rd=1 + for ( ; it != fanout_info.end(); ++it ) + { + if ( it->relative_depth > rd_prev + 1 ) // level skip => must not full + { + return rd_prev + 1; + } + else if ( it->num_edges == _ps.assume.splitter_capacity * num_splitters_prev ) // full layer + { + num_splitters_prev = it->num_edges - it->fanouts.size() - it->extrefs.size(); + rd_prev = it->relative_depth; + } + else + { + return it->relative_depth; + } + } + // all full + return fanout_info.back().relative_depth + 1; } - void mark_occupied( node const& n, uint32_t rd ) + /* move `no`, which is a fanout of `n`, from `from_rd` to `to_rd` */ + void pseudo_move( node const& n, node const& no, uint32_t from_rd, uint32_t to_rd ) { + assert( from_rd > to_rd ); auto& fanout_info = _fanouts[n]; - for ( auto it = fanout_info.begin(); it != fanout_info.end(); ++it ) + auto it = fanout_info.begin(); + for ( ; it != fanout_info.end(); ++it ) { - if ( it->relative_depth == rd ) + if ( it->relative_depth == to_rd ) { ++it->num_edges; - return; + it->fanouts.push_back( no ); + break; + } + else if ( it->relative_depth > to_rd ) + { + fanout_info.insert( it, {to_rd, {no}, {}, 2} ); + break; + } + } + for ( ; it != fanout_info.end(); ++it ) + { + if ( it->relative_depth == from_rd ) + { + --it->num_edges; + for ( auto it2 = it->fanouts.begin(); it2 != it->fanouts.end(); ++it2 ) + { + if ( *it2 == no ) + { + it->fanouts.erase( it2 ); + return; + } + } + assert( false ); } } + assert( false ); } bool analyze_chunk_up( chunk c ) @@ -1762,14 +1828,66 @@ class buffer_insertion ++c.benefits; } auto const& fanout_info = _fanouts[oi.c]; - if ( _ntk.fanout_size( oi.c ) == _external_ref_count[oi.c] ) // only POs - c.slack = std::min( c.slack, int32_t( _depth - _levels[oi.c] - num_splitter_levels( oi.c ) ) ); - else if ( fanout_info.size() == 1 ) // single fanout + if ( fanout_info.size() == 1 ) /* single fanout */ c.slack = std::min( c.slack, int32_t( fanout_info.front().relative_depth - 1 ) ); else c.slack = std::min( c.slack, int32_t( _levels[oi.o] - _levels[oi.c] - 2 ) ); } + std::vector po_to_move; + if ( c.po_interfaces.size() > 0 ) + { + for ( auto poi : c.po_interfaces ) + { + if ( _levels[poi.c] + num_splitter_levels( poi.c ) + c.slack >= _po_levels[poi.o] ) + { + if ( _ps.assume.balance_cios ) + c.slack = std::min( c.slack, int32_t( _po_levels[poi.o] - _levels[poi.c] - num_splitter_levels( poi.c ) - 1 ) ); + else + { + c.slack = std::min( c.slack, int32_t( _depth + 1 - _po_levels[poi.o] ) ); + po_to_move.emplace_back( poi.o ); + } + } + else + { + if ( marked_oi.find( poi.c ) == marked_oi.end() ) + ++c.benefits; + } + } + } + + if ( c.benefits <= 0 || c.slack <= 0 ) + return false; + + std::vector pi_members; + for ( auto m : c.members ) + { + if ( _ntk.is_pi( m ) ) + pi_members.emplace_back( m ); + } + if ( pi_members.size() > 0 ) + { + while ( c.slack > 0 ) + { + bool ok = true; + for ( auto m : pi_members ) + { + if ( !is_acceptable_ci_lvl( _levels[m] + c.slack ) ) + { + ok = false; + break; + } + } + if ( !ok ) + --c.slack; + else + break; + } + } + if ( po_to_move.size() > 0 ) + c.slack -= c.slack % _ps.assume.num_phases; + if ( c.benefits > 0 && c.slack > 0 ) { count_buffers(); @@ -1778,17 +1896,12 @@ class buffer_insertion for ( auto m : c.members ) _levels[m] += c.slack; + for ( auto po : po_to_move ) + _po_levels[po] += c.slack; for ( auto m : c.members ) - { legal &= update_fanout_info( m ); - if ( !legal ) - break; - } - if ( legal ) - { - for ( auto ii : c.input_interfaces ) - update_fanout_info( ii.o ); - } + for ( auto ii : c.input_interfaces ) + legal &= update_fanout_info( ii.o ); _outdated = true; if ( legal ) @@ -1798,6 +1911,8 @@ class buffer_insertion /* UNDO */ for ( auto m : c.members ) _levels[m] -= c.slack; + for ( auto po : po_to_move ) + _po_levels[po] -= c.slack; for ( auto m : c.members ) update_fanout_info( m ); for ( auto ii : c.input_interfaces ) @@ -1814,53 +1929,11 @@ class buffer_insertion return false; } } - - void adjust_depth() - { - if ( !_ps.assume.balance_pis ) - { - auto min_level = std::numeric_limits::max(); - if ( _ps.assume.branch_pis ) - { - _ntk.foreach_pi( [&]( auto n ) { - min_level = std::min( min_level, _levels[n] ); - } ); - - if ( min_level != 0 ) - { - _ntk.foreach_node( [&]( auto n ) { - if ( !_ntk.is_constant( n ) ) - _levels[n] -= min_level; - } ); - } - } - else - { - _ntk.foreach_gate( [&]( auto n ) { - min_level = std::min( min_level, _levels[n] ); - } ); - - if ( min_level > 1 ) - { - _ntk.foreach_gate( [&]( auto n ) { - _levels[n] -= min_level - 1; - } ); - } - } - } - - _depth = 0; - _ntk.foreach_po( [&]( auto f ) { - _depth = std::max( _depth, _levels[_ntk.get_node( f )] + num_splitter_levels( _ntk.get_node( f ) ) ); - } ); - - _outdated = true; - } #pragma endregion #pragma region Global optimal by SMT private: -#include "optimal_buffer_insertion.hpp" +//#include "optimal_buffer_insertion.hpp" #pragma endregion private: @@ -1868,6 +1941,7 @@ class buffer_insertion { uint32_t relative_depth{ 0u }; std::list fanouts; + std::list extrefs; // IDs of POs (as in `_ntk.foreach_po`) uint32_t num_edges{ 0u }; }; using fanouts_by_level = std::list; @@ -1877,25 +1951,25 @@ class buffer_insertion bool _outdated{ true }; bool _is_scheduled_ASAP{ true }; + /* The following data structures uniquely define the state (i.e. schedule) of the algorithm/flow. + The rest (`_fanouts` and `_num_buffers`) are computed from these by calling `count_buffers()`. */ node_map _levels; - node_map, Ntk> _timeframes; + std::vector _po_levels; // imaginary node, must be at `num_phases * k + 1` uint32_t _depth{ 0u }; /* Guarantees on `_fanouts` (when not `_outdated`): - * - If not `branch_pis`: `_fanouts[PI]` is empty. - * - PO ref count is added to `num_edges` of the last element. + * - Sum of `_fanouts[n][l].fanouts.length() + _fanouts[n][l].extrefs.length()` over all `l`s + * should be equal to `ntk.fanout_size( n )`. * - If having only one fanout: `_fanouts[n].size() == 1`. * - If having multiple fanouts: `_fanouts[n]` must have at least two elements, * and the first element must have `relative_depth == 1` and `num_edges == 1`. + * - If `ci_capacity > 1`, `_fanouts[PI].size()` may be 1. */ node_map _fanouts; - node_map _external_ref_count; // total refs - node_map _external_ref_count_neg; // negated refs node_map _num_buffers; - node_map _min_level; - node_map _max_level; + node_map, Ntk> _timeframes; // only for SMT; the most extreme min/max uint32_t _start_id; // for chunked movement -}; /* buffer_insertion */ +}; /* buffer_insertion */ -} // namespace mockturtle +} // namespace mockturtle \ No newline at end of file diff --git a/include/mockturtle/algorithms/aqfp/buffer_verification.hpp b/include/mockturtle/algorithms/aqfp/buffer_verification.hpp index 3e9bcb1cd..ca899c88c 100644 --- a/include/mockturtle/algorithms/aqfp/buffer_verification.hpp +++ b/include/mockturtle/algorithms/aqfp/buffer_verification.hpp @@ -61,6 +61,7 @@ uint32_t recompute_level( Ntk& ntk, typename Ntk::node const& n ) { if ( ntk.visited( n ) == ntk.trav_id() ) return ntk.level( n ); + ntk.set_visited( n, ntk.trav_id() ); uint32_t max_fi_level{ 0u }; ntk.foreach_fanin( n, [&]( auto const& fi ) { @@ -72,76 +73,31 @@ uint32_t recompute_level( Ntk& ntk, typename Ntk::node const& n ) } // namespace detail -/*! \brief Find a reasonable level assignment for a buffered network. +/*! \brief Find a reasonable level assignment for a buffered network given PI levels. * * \param ntk Buffered network - * \param ps AQFP constraints + * \param pi_levels Levels of PIs * \return Level assignment to all nodes */ template -node_map schedule_buffered_network( Ntk const& ntk, aqfp_assumptions const& ps ) +node_map schedule_buffered_network_with_PI_levels( Ntk const& ntk, std::vector const& pi_levels ) { + assert( pi_levels.size() == ntk.num_pis() ); + using node = typename Ntk::node; node_map levels( ntk ); depth_view dv{ ntk }; - /* PIs are balanced : simple ASAP - POs are balanced : ALAP == ASAP and then lift all POs' TFI cone - neither : start from higher PO's TFI cone */ - if ( !ps.balance_pis ) - { - ntk.incr_trav_id(); - ntk.set_visited( ntk.get_node( ntk.get_constant( false ) ), ntk.trav_id() ); - ntk.foreach_pi( [&]( auto const& n ) { - ntk.set_visited( n, ntk.trav_id() ); - } ); - - if ( ps.balance_pos ) - { - ntk.foreach_po( [&]( auto const& f ) { - detail::schedule_fanin_cone( dv, ntk.get_node( f ), dv.depth() ); - } ); - } - else - { - std::list pos; - ntk.foreach_po( [&]( auto const& f ) { - pos.push_back( ntk.get_node( f ) ); - } ); + ntk.incr_trav_id(); + ntk.set_visited( ntk.get_node( ntk.get_constant( false ) ), ntk.trav_id() ); + ntk.foreach_pi( [&]( auto const& n, auto i ) { + ntk.set_visited( n, ntk.trav_id() ); + dv.set_level( n, pi_levels[i] ); + } ); - while ( pos.size() > 0 ) - { - /* choose the highest unscheduled PO */ - node n = pos.front(); - uint32_t max_level = dv.level( n ); - for ( auto it = pos.begin(); it != pos.end(); ++it ) - { - if ( dv.level( *it ) > max_level ) - { - n = *it; - max_level = dv.level( n ); - } - } - - detail::schedule_fanin_cone( dv, n, max_level ); - - for ( auto it = pos.begin(); it != pos.end(); ) - { - /* remove all visited POs (there may be lower POs in the TFI of the processed PO) */ - if ( ntk.visited( *it ) == ntk.trav_id() ) - { - it = pos.erase( it ); - } - /* recompute levels because some of their TFI may have been lifted */ - else - { - detail::recompute_level( dv, *it ); - ++it; - } - } - } - } - } + ntk.foreach_po( [&]( auto const& f ){ + detail::recompute_level( dv, ntk.get_node( f ) ); + }); ntk.foreach_node( [&]( auto const& n ) { levels[n] = dv.level( n ); @@ -153,12 +109,12 @@ node_map schedule_buffered_network( Ntk const& ntk, aqfp_assumpti /*! \brief Verify a buffered network according to AQFP assumptions with provided level assignment. * * \param ntk Buffered network - * \param ps AQFP constraints + * \param ps AQFP assumptions * \param levels Level assignment for all nodes * \return Whether `ntk` is path-balanced and properly-branched */ template -bool verify_aqfp_buffer( Ntk const& ntk, aqfp_assumptions const& ps, node_map const& levels ) +bool verify_aqfp_buffer( Ntk const& ntk, aqfp_assumptions_legacy const& ps, node_map const& levels ) { static_assert( is_buffered_network_type_v, "Ntk is not a buffered network" ); static_assert( has_is_buf_v, "Ntk does not implement the is_buf method" ); @@ -214,16 +170,122 @@ bool verify_aqfp_buffer( Ntk const& ntk, aqfp_assumptions const& ps, node_map -bool verify_aqfp_buffer( Ntk const& ntk, aqfp_assumptions const& ps ) +bool verify_aqfp_buffer( Ntk const& ntk, aqfp_assumptions_realistic const& ps, node_map const& levels ) +{ + static_assert( is_buffered_network_type_v, "Ntk is not a buffered network" ); + static_assert( has_is_buf_v, "Ntk does not implement the is_buf method" ); + bool legal = true; + + /* fanout branching */ + ntk.foreach_node( [&]( auto const& n ) { + if ( ntk.is_constant( n ) ) + return; + if ( ntk.is_pi( n ) ) + { + legal &= ( ntk.fanout_size( n ) <= ps.ci_capacity ); + } + else if ( ntk.is_buf( n ) ) + { + legal &= ( ntk.fanout_size( n ) <= ps.splitter_capacity ); + } + else /* logic gate */ + { + legal &= ( ntk.fanout_size( n ) <= 1 ); + } + assert( legal ); + } ); + + /* path balancing */ + ntk.foreach_node( [&]( auto const& n ) { + ntk.foreach_fanin( n, [&]( auto const& fi ) { + auto ni = ntk.get_node( fi ); + if ( !ntk.is_constant( ni ) ) + legal &= ( levels[ni] == levels[n] - 1 ); + assert( legal ); + } ); + } ); + + if ( ps.balance_cios ) + { + auto const check_pi_fn = [&]( uint32_t level ){ + for ( auto const& p : ps.ci_phases ) + { + if ( level == p ) + return true; + } + return false; + }; + + ntk.foreach_pi( [&]( auto const& n ) { + legal &= check_pi_fn( levels[n] ); + assert( legal ); + } ); + + uint32_t depth{ 0u }; + ntk.foreach_po( [&]( auto const& f ) { + auto n = ntk.get_node( f ); + if ( !ntk.is_constant( n ) ) + { + if ( depth == 0u ) + depth = levels[n]; + else + legal &= ( levels[n] == depth ); + assert( legal ); + } + } ); + legal &= ( depth % ps.num_phases == 0 ); + assert( legal ); + } + else + { + auto const check_pi_fn = [&]( uint32_t level ){ + for ( auto const& p : ps.ci_phases ) + { + if ( level >= p && ( level - p ) % ps.num_phases == 0 ) + return true; + } + return false; + }; + + ntk.foreach_pi( [&]( auto const& n ) { + legal &= check_pi_fn( levels[n] ); + assert( legal ); + } ); + + ntk.foreach_po( [&]( auto const& f ) { + auto n = ntk.get_node( f ); + if ( !ntk.is_constant( n ) ) + { + legal &= ( levels[n] % ps.num_phases == 0 ); + assert( legal ); + } + } ); + } + + // TODO: max_phase_skip + + return legal; +} + +/*! \brief Verify a buffered network according to AQFP assumptions with provided PI level assignment. + * + * \param ntk Buffered network + * \param ps AQFP assumptions + * \param pi_levels Levels of PIs + * \return Whether `ntk` is path-balanced, phase-aligned, and properly-branched + */ +template +bool verify_aqfp_buffer( Ntk const& ntk, Asmp const& ps, std::vector const& pi_levels ) { - auto const levels = schedule_buffered_network( ntk, ps ); + auto const levels = schedule_buffered_network_with_PI_levels( ntk, pi_levels ); return verify_aqfp_buffer( ntk, ps, levels ); } diff --git a/include/mockturtle/algorithms/aqfp/optimal_buffer_insertion.hpp b/include/mockturtle/algorithms/aqfp/optimal_buffer_insertion.hpp index b2cefa969..859f681fb 100644 --- a/include/mockturtle/algorithms/aqfp/optimal_buffer_insertion.hpp +++ b/include/mockturtle/algorithms/aqfp/optimal_buffer_insertion.hpp @@ -33,6 +33,91 @@ // NOTE: This file is included inside the class `mockturtle::buffer_insertion` // It should not be included anywhere else. +#pragma region Compute timeframe for SMT solving + /*! \brief Compute the earliest and latest possible timeframe by eager ASAP and ALAP */ + uint32_t compute_timeframe( uint32_t max_depth ) + { + // TODO: Consider max_depth % _ps.assume.num_phases == 0 constraint + _timeframes.reset( std::make_pair( 0, 0 ) ); + uint32_t min_depth{ 0 }; + + _ntk.incr_trav_id(); + _ntk.foreach_po( [&]( auto const& f ) { + auto const no = _ntk.get_node( f ); + auto clevel = compute_levels_ASAP_eager( no ) + ( _ntk.fanout_size( no ) > 1 ? 1 : 0 ); + min_depth = std::max( min_depth, clevel ); + } ); + + _ntk.incr_trav_id(); + _ntk.foreach_po( [&]( auto const& f ) { + const auto n = _ntk.get_node( f ); + if ( !_ntk.is_constant( n ) && _ntk.visited( n ) != _ntk.trav_id() ) + { + _timeframes[n].second = max_depth - ( _ntk.fanout_size( n ) > 1 ? 1 : 0 ); + compute_levels_ALAP_eager( n ); + } + } ); + + return min_depth; + } + + uint32_t compute_levels_ASAP_eager( node const& n ) + { + if ( _ntk.visited( n ) == _ntk.trav_id() ) + { + return _timeframes[n].first; + } + _ntk.set_visited( n, _ntk.trav_id() ); + + if ( _ntk.is_constant( n ) ) + { + return _timeframes[n].first = 0; + } + if ( _ntk.is_pi( n ) ) + { + return _timeframes[n].first = _ps.assume.ci_phases[0]; + } + + uint32_t level{ 0 }; + _ntk.foreach_fanin( n, [&]( auto const& fi ) { + auto const ni = _ntk.get_node( fi ); + if ( !_ntk.is_constant( ni ) ) + { + level = std::max( level, compute_levels_ASAP_eager( ni ) + ( _ntk.fanout_size( ni ) > 1 ? 1 : 0 ) ); + } + } ); + + return _timeframes[n].first = level + 1; + } + + void compute_levels_ALAP_eager( node const& n ) + { + _ntk.set_visited( n, _ntk.trav_id() ); + + _ntk.foreach_fanin( n, [&]( auto const& fi ) { + auto const ni = _ntk.get_node( fi ); + if ( !_ntk.is_constant( ni ) ) + { + if ( _ps.assume.balance_cios && _ntk.is_pi( ni ) ) + { + assert( _timeframes[n].second > _ps.assume.ci_phases[0] ); + _timeframes[ni].second = _ps.assume.ci_phases[0]; + } + else + { + assert( _timeframes[n].second > num_splitter_levels( ni ) ); + auto fi_level = _timeframes[n].second - ( _ntk.fanout_size( ni ) > 1 ? 2 : 1 ); + if ( _ntk.visited( ni ) != _ntk.trav_id() || _timeframes[ni].second > fi_level ) + { + _timeframes[ni].second = fi_level; + compute_levels_ALAP_eager( ni ); + } + } + } + } ); + } +#pragma + #if __GNUC__ == 7 void optimize_with_smt( std::string name = "" ) diff --git a/include/mockturtle/io/write_dot.hpp b/include/mockturtle/io/write_dot.hpp index 284db76a1..edbed2914 100644 --- a/include/mockturtle/io/write_dot.hpp +++ b/include/mockturtle/io/write_dot.hpp @@ -75,6 +75,13 @@ class default_dot_drawer } else { + if constexpr ( has_is_buf_v ) + { + if ( ntk.is_buf( n ) ) + { + return "box"; + } + } return "ellipse"; } } @@ -98,6 +105,16 @@ class default_dot_drawer virtual std::string node_fillcolor( Ntk const& ntk, node const& n ) const { + if constexpr ( has_is_buf_v ) + { + if ( ntk.is_buf( n ) ) + { + if ( ntk.fanout_size( n ) > 1 ) + return "lightcoral"; + else + return "lightskyblue"; + } + } return ( ntk.is_constant( n ) || ntk.is_ci( n ) ) ? "snow2" : "white"; } @@ -113,6 +130,11 @@ class default_dot_drawer (void)ntk; (void)n; (void)f; + if constexpr ( is_buffered_network_type_v ) + { + if ( ntk.is_constant( ntk.get_node( f ) ) ) + return false; + } return true; } diff --git a/include/mockturtle/mockturtle.hpp b/include/mockturtle/mockturtle.hpp index 3142be58b..030bfdc40 100644 --- a/include/mockturtle/mockturtle.hpp +++ b/include/mockturtle/mockturtle.hpp @@ -173,6 +173,7 @@ #include "mockturtle/networks/tig.hpp" #include "mockturtle/networks/xag.hpp" #include "mockturtle/networks/xmg.hpp" +#include "mockturtle/networks/crossed.hpp" #include "mockturtle/properties/aqfpcost.hpp" #include "mockturtle/properties/mccost.hpp" #include "mockturtle/properties/migcost.hpp" @@ -214,3 +215,4 @@ #include "mockturtle/views/names_view.hpp" #include "mockturtle/views/topo_view.hpp" #include "mockturtle/views/window_view.hpp" +#include "mockturtle/views/rank_view.hpp" diff --git a/include/mockturtle/networks/buffered.hpp b/include/mockturtle/networks/buffered.hpp index 628e6d9f5..eef214f76 100644 --- a/include/mockturtle/networks/buffered.hpp +++ b/include/mockturtle/networks/buffered.hpp @@ -345,9 +345,93 @@ class buffered_mig_network : public mig_network #pragma region Restructuring // disable restructuring - std::optional> replace_in_node( node const& n, node const& old_node, signal new_signal ) = delete; - void replace_in_outputs( node const& old_node, signal const& new_signal ) = delete; - void take_out_node( node const& n ) = delete; + void replace_in_node( node const& n, node const& old_node, signal new_signal ) + { + assert( is_buf( old_node ) ); + auto& node = _storage->nodes[n]; + + if ( is_buf( n ) ) + { + assert( node.children[0].index == old_node ); + new_signal.complement ^= node.children[0].weight; + node.children[0] = new_signal; + node.children[1] = !new_signal; + _storage->nodes[new_signal.index].data[0].h1++; + return; + } + + uint32_t fanin = 3u; + for ( auto i = 0u; i < 3u; ++i ) + { + if ( node.children[i].index == old_node ) + { + fanin = i; + new_signal.complement ^= node.children[i].weight; + break; + } + } + assert( fanin < 3 ); + signal child2 = new_signal; + signal child1 = node.children[( fanin + 1 ) % 3]; + signal child0 = node.children[( fanin + 2 ) % 3]; + if ( child0.index > child1.index ) + { + std::swap( child0, child1 ); + } + if ( child1.index > child2.index ) + { + std::swap( child1, child2 ); + } + if ( child0.index > child1.index ) + { + std::swap( child0, child1 ); + } + + _storage->hash.erase( node ); + node.children[0] = child0; + node.children[1] = child1; + node.children[2] = child2; + _storage->hash[node] = n; + + // update the reference counter of the new signal + _storage->nodes[new_signal.index].data[0].h1++; + } + void replace_in_outputs( node const& old_node, signal const& new_signal ) + { + assert( !is_dead( old_node ) ); + + for ( auto& output : _storage->outputs ) + { + if ( output.index == old_node ) + { + output.index = new_signal.index; + output.weight ^= new_signal.complement; + + if ( old_node != new_signal.index ) + { + // increment fan-in of new node + _storage->nodes[new_signal.index].data[0].h1++; + } + } + } + } + void take_out_node( node const& n ) + { + assert( is_buf( n ) ); + + auto& nobj = _storage->nodes[n]; + nobj.data[0].h1 = UINT32_C( 0x80000000 ); /* fanout size 0, but dead */ + + for ( auto const& fn : _events->on_delete ) + { + ( *fn )( n ); + } + + if ( decr_fanout_size( nobj.children[0].index ) == 0 ) + { + take_out_node( nobj.children[0].index ); + } + } void substitute_node( node const& old_node, signal const& new_signal ) = delete; void substitute_nodes( std::list> substitutions ) = delete; #pragma endregion diff --git a/test/algorithms/aqfp/aqfp_retiming.cpp b/test/algorithms/aqfp/aqfp_retiming.cpp index e789438d2..28db8efe1 100644 --- a/test/algorithms/aqfp/aqfp_retiming.cpp +++ b/test/algorithms/aqfp/aqfp_retiming.cpp @@ -107,7 +107,7 @@ TEST_CASE( "aqfp retiming", "[aqfp_retiming]" ) asp.balance_pos = true; buffer_insertion_params ps; - ps.assume = asp; + ps.assume = legacy_to_realistic( asp ); ps.scheduling = buffer_insertion_params::ASAP; ps.optimization_effort = buffer_insertion_params::none; @@ -123,5 +123,8 @@ TEST_CASE( "aqfp retiming", "[aqfp_retiming]" ) CHECK( rst.buffers_pre == 57 ); CHECK( rst.buffers_post == 49 ); - CHECK( verify_aqfp_buffer( aqfp_ret, asp ) == true ); + std::vector pi_levels; + for ( auto i = 0u; i < aqfp_ret.num_pis(); ++i ) + pi_levels.emplace_back( 0 ); + CHECK( verify_aqfp_buffer( aqfp_ret, asp, pi_levels ) == true ); } diff --git a/test/algorithms/aqfp/buffer_insertion.cpp b/test/algorithms/aqfp/buffer_insertion.cpp index 6a3333857..c12e4fd89 100644 --- a/test/algorithms/aqfp/buffer_insertion.cpp +++ b/test/algorithms/aqfp/buffer_insertion.cpp @@ -11,217 +11,6 @@ using namespace mockturtle; -TEST_CASE( "buffer_insertion simple test", "[buffer_insertion]" ) -{ - mig_network mig; - auto const a = mig.create_pi(); - auto const b = mig.create_pi(); - auto const c = mig.create_pi(); - auto const d = mig.create_pi(); - auto const e = mig.create_pi(); - - auto const f1 = mig.create_maj( a, b, c ); - auto const f2 = mig.create_maj( d, e, f1 ); - auto const f3 = mig.create_maj( a, d, f1 ); - auto const f4 = mig.create_maj( f1, f2, f3 ); - mig.create_po( f4 ); - - buffer_insertion_params ps; - ps.assume.branch_pis = false; - ps.assume.balance_pis = false; - ps.assume.balance_pos = true; - ps.assume.splitter_capacity = 4u; - ps.scheduling = buffer_insertion_params::ASAP; - ps.optimization_effort = buffer_insertion_params::none; - - buffer_insertion buffering( mig, ps ); - node_map levels{ mig }; - CHECK( buffering.dry_run( &levels ) == 2u ); - - CHECK( levels[f1] == 1u ); - CHECK( levels[f2] == 3u ); - CHECK( levels[f3] == 3u ); - CHECK( levels[f4] == 4u ); - CHECK( buffering.depth() == 4u ); - CHECK( buffering.num_buffers( mig.get_node( f1 ) ) == 2u ); - CHECK( buffering.num_buffers( mig.get_node( f2 ) ) == 0u ); - CHECK( buffering.num_buffers( mig.get_node( f3 ) ) == 0u ); - CHECK( buffering.num_buffers( mig.get_node( f4 ) ) == 0u ); -} - -TEST_CASE( "two layers of splitters", "[buffer_insertion]" ) -{ - mig_network mig; - auto const a = mig.create_pi(); - auto const b = mig.create_pi(); - auto const c = mig.create_pi(); - auto const d = mig.create_pi(); - auto const e = mig.create_pi(); - auto const f = mig.create_pi(); - auto const g = mig.create_pi(); - auto const h = mig.create_pi(); - auto const i = mig.create_pi(); - auto const j = mig.create_pi(); - - auto const f1 = mig.create_maj( a, b, c ); - auto const f2 = mig.create_maj( b, c, d ); - auto const f3 = mig.create_maj( d, e, f ); - auto const f4 = mig.create_maj( g, h, i ); - auto const f5 = mig.create_maj( h, i, j ); - - auto const f6 = mig.create_maj( f3, f4, f5 ); - auto const f7 = mig.create_maj( a, f1, f2 ); - auto const f8 = mig.create_maj( f2, f3, g ); - auto const f9 = mig.create_maj( f7, f2, f8 ); - auto const f10 = mig.create_maj( f8, f2, f5 ); - auto const f11 = mig.create_maj( f2, f8, f6 ); - auto const f12 = mig.create_maj( f9, f10, f11 ); - mig.create_po( f12 ); - - buffer_insertion_params ps; - ps.assume.branch_pis = false; - ps.assume.balance_pis = false; - ps.assume.balance_pos = true; - ps.assume.splitter_capacity = 4u; - ps.scheduling = buffer_insertion_params::ASAP; - ps.optimization_effort = buffer_insertion_params::none; - - buffer_insertion buffering( mig, ps ); - CHECK( buffering.dry_run() == 17u ); - - CHECK( buffering.num_buffers( mig.get_node( f2 ) ) == 4u ); - CHECK( buffering.num_buffers( mig.get_node( f6 ) ) == 2u ); - CHECK( buffering.depth() == 7u ); -} - -TEST_CASE( "PO splitters, buffers and inverters", "[buffer_insertion]" ) -{ - mig_network mig; - auto const a = mig.create_pi(); - auto const b = mig.create_pi(); - auto const c = mig.create_pi(); - auto const d = mig.create_pi(); - - auto const f1 = mig.create_maj( a, b, c ); - auto const f2 = mig.create_maj( f1, c, d ); - mig.create_po( f1 ); - mig.create_po( !f1 ); - mig.create_po( f2 ); - mig.create_po( f2 ); - mig.create_po( !f2 ); - - buffer_insertion_params ps; - ps.assume.branch_pis = false; - ps.assume.balance_pis = false; - ps.assume.balance_pos = true; - ps.assume.splitter_capacity = 4u; - ps.scheduling = buffer_insertion_params::ASAP; - ps.optimization_effort = buffer_insertion_params::none; - - buffer_insertion buffering( mig, ps ); - CHECK( buffering.dry_run() == 8u ); - - CHECK( buffering.depth() == 5u ); - CHECK( buffering.num_buffers( mig.get_node( f1 ) ) == 5u ); - CHECK( buffering.num_buffers( mig.get_node( f2 ) ) == 3u ); - - buffered_mig_network bufntk; - buffering.dump_buffered_network( bufntk ); - CHECK( verify_aqfp_buffer( bufntk, ps.assume ) == true ); -} - -TEST_CASE( "chain of fanouts", "[buffer_insertion]" ) -{ - mig_network mig; - auto const a = mig.create_pi(); - auto const b = mig.create_pi(); - auto const c = mig.create_pi(); - auto const d = mig.create_pi(); - auto const e = mig.create_pi(); - auto const f = mig.create_pi(); - auto const g = mig.create_pi(); - auto const h = mig.create_pi(); - auto const i = mig.create_pi(); - - auto const f1 = mig.create_maj( a, b, c ); - auto const f2 = mig.create_maj( f1, c, d ); - auto const f3 = mig.create_maj( f1, f2, e ); - auto const f4 = mig.create_maj( f1, f2, f ); - auto const f5 = mig.create_maj( f1, f3, f4 ); - auto const f6 = mig.create_maj( f1, f5, f ); - auto const f7 = mig.create_maj( f1, f2, g ); - auto const f8 = mig.create_maj( f1, f7, h ); - auto const f9 = mig.create_maj( f1, f7, i ); - mig.create_po( f1 ); - mig.create_po( f1 ); - mig.create_po( f1 ); - mig.create_po( f1 ); - mig.create_po( f1 ); - mig.create_po( f6 ); - mig.create_po( f8 ); - mig.create_po( f9 ); - - buffer_insertion_params ps; - ps.assume.branch_pis = false; - ps.assume.balance_pis = false; - ps.assume.balance_pos = true; - ps.assume.splitter_capacity = 4u; - ps.scheduling = buffer_insertion_params::ASAP; - ps.optimization_effort = buffer_insertion_params::none; - - buffer_insertion buffering( mig, ps ); - CHECK( buffering.dry_run() == 11u ); - - CHECK( buffering.num_buffers( mig.get_node( f1 ) ) == 9u ); - CHECK( buffering.depth() == 8u ); -} - -TEST_CASE( "branch but not balance PIs", "[buffer_insertion]" ) -{ - mig_network mig; - auto const a = mig.create_pi(); - auto const b = mig.create_pi(); // shared - auto const c = mig.create_pi(); // shared - auto const d = mig.create_pi(); - auto const e = mig.create_pi(); // shared at higher level - auto const f = mig.create_pi(); // connects to two POs - - auto const f1 = mig.create_maj( a, b, c ); - auto const f2 = mig.create_maj( b, c, d ); - auto const f3 = mig.create_and( f1, e ); - auto const f4 = mig.create_and( f2, e ); - mig.create_po( f3 ); - mig.create_po( f4 ); - mig.create_po( f ); - mig.create_po( f ); - - buffer_insertion_params ps; - ps.assume.branch_pis = true; - ps.assume.balance_pis = false; - ps.assume.balance_pos = true; - ps.assume.splitter_capacity = 4u; - ps.scheduling = buffer_insertion_params::ALAP; - ps.optimization_effort = buffer_insertion_params::none; - - buffer_insertion buffering( mig, ps ); - node_map levels{ mig }; - CHECK( buffering.dry_run( &levels ) == 4u ); - - CHECK( buffering.level( mig.get_node( f1 ) ) == 2u ); - CHECK( buffering.level( mig.get_node( f2 ) ) == 2u ); - CHECK( buffering.level( mig.get_node( f3 ) ) == 3u ); - CHECK( buffering.level( mig.get_node( f4 ) ) == 3u ); - - CHECK( buffering.level( mig.get_node( a ) ) == 1u ); - CHECK( buffering.level( mig.get_node( b ) ) == 0u ); - CHECK( buffering.level( mig.get_node( c ) ) == 0u ); - CHECK( buffering.level( mig.get_node( d ) ) == 1u ); - CHECK( buffering.level( mig.get_node( e ) ) == 1u ); - CHECK( buffering.level( mig.get_node( f ) ) == 2u ); - - CHECK( buffering.depth() == 3u ); -} - TEST_CASE( "various assumptions", "[buffer_insertion]" ) { aig_network aig; @@ -245,135 +34,56 @@ TEST_CASE( "various assumptions", "[buffer_insertion]" ) aig.create_po( f3 ); aig.create_po( f4 ); - aqfp_assumptions asp; + aqfp_assumptions_realistic asp; asp.splitter_capacity = 2u; + asp.num_phases = 1u; + asp.ci_phases = {0}; buffer_insertion_params ps; ps.scheduling = buffer_insertion_params::ASAP; ps.optimization_effort = buffer_insertion_params::none; /* branch PI, balance PI and PO */ - asp.branch_pis = true; - asp.balance_pis = true; - asp.balance_pos = true; + asp.ci_capacity = 1; + asp.balance_cios = true; ps.assume = asp; { buffer_insertion buffering( aig, ps ); buffered_aig_network buffered; CHECK( buffering.run( buffered ) == 23u ); - CHECK( verify_aqfp_buffer( buffered, asp ) == true ); - } - - /* branch PI, balance only PI */ - asp.branch_pis = true; - asp.balance_pis = true; - asp.balance_pos = false; - ps.assume = asp; - { - buffer_insertion buffering( aig, ps ); - buffered_aig_network buffered; - CHECK( buffering.run( buffered ) == 11u ); - CHECK( verify_aqfp_buffer( buffered, asp ) == true ); - } - - /* branch PI, balance only PO */ - asp.branch_pis = true; - asp.balance_pis = false; - asp.balance_pos = true; - ps.assume = asp; - { - ps.scheduling = buffer_insertion_params::ASAP; - buffer_insertion buffering1( aig, ps ); - buffered_aig_network buffered1; - CHECK( buffering1.run( buffered1 ) == 23u ); - CHECK( verify_aqfp_buffer( buffered1, asp ) == true ); - - ps.scheduling = buffer_insertion_params::ALAP; - buffer_insertion buffering2( aig, ps ); - buffered_aig_network buffered2; - CHECK( buffering2.run( buffered2 ) == 11u ); - CHECK( verify_aqfp_buffer( buffered2, asp ) == true ); - - ps.scheduling = buffer_insertion_params::ASAP_depth; - buffer_insertion buffering3( aig, ps ); - buffered_aig_network buffered3; - CHECK( buffering3.run( buffered3 ) == 17u ); - CHECK( verify_aqfp_buffer( buffered3, asp ) == true ); - - ps.scheduling = buffer_insertion_params::ALAP_depth; - buffer_insertion buffering4( aig, ps ); - buffered_aig_network buffered4; - CHECK( buffering4.run( buffered4 ) == 10u ); - CHECK( verify_aqfp_buffer( buffered4, asp ) == true ); + CHECK( verify_aqfp_buffer( buffered, asp, buffering.pi_levels() ) == true ); } /* branch PI, balance neither */ - asp.branch_pis = true; - asp.balance_pis = false; - asp.balance_pos = false; + asp.ci_capacity = 1; + asp.balance_cios = false; ps.assume = asp; { ps.scheduling = buffer_insertion_params::ASAP; buffer_insertion buffering1( aig, ps ); buffered_aig_network buffered1; - CHECK( buffering1.run( buffered1 ) == 11u ); - CHECK( verify_aqfp_buffer( buffered1, asp ) == true ); + buffering1.run( buffered1 ); + CHECK( verify_aqfp_buffer( buffered1, asp, buffering1.pi_levels() ) == true ); ps.scheduling = buffer_insertion_params::ALAP; buffer_insertion buffering2( aig, ps ); buffered_aig_network buffered2; - CHECK( buffering2.run( buffered2 ) == 9u ); - CHECK( verify_aqfp_buffer( buffered2, asp ) == true ); + buffering2.run( buffered2 ); + CHECK( verify_aqfp_buffer( buffered2, asp, buffering2.pi_levels() ) == true ); ps.scheduling = buffer_insertion_params::ASAP_depth; buffer_insertion buffering3( aig, ps ); buffered_aig_network buffered3; - CHECK( buffering3.run( buffered3 ) == 8u ); - CHECK( verify_aqfp_buffer( buffered3, asp ) == true ); + buffering3.run( buffered3 ); + CHECK( buffering3.depth() == 4 ); + CHECK( verify_aqfp_buffer( buffered3, asp, buffering3.pi_levels() ) == true ); ps.scheduling = buffer_insertion_params::ALAP_depth; buffer_insertion buffering4( aig, ps ); buffered_aig_network buffered4; - CHECK( buffering4.run( buffered4 ) == 8u ); - CHECK( verify_aqfp_buffer( buffered4, asp ) == true ); - } - - /* don't branch PI, balance PO */ - asp.branch_pis = false; - asp.balance_pis = false; - asp.balance_pos = true; - ps.assume = asp; - { - ps.scheduling = buffer_insertion_params::ASAP; - buffer_insertion buffering1( aig, ps ); - buffered_aig_network buffered1; - CHECK( buffering1.run( buffered1 ) == 5u ); - CHECK( verify_aqfp_buffer( buffered1, asp ) == true ); - - ps.scheduling = buffer_insertion_params::ASAP_depth; - buffer_insertion buffering2( aig, ps ); - buffered_aig_network buffered2; - CHECK( buffering2.run( buffered2 ) == 5u ); - CHECK( verify_aqfp_buffer( buffered2, asp ) == true ); - } - - /* don't branch PI, balance neither */ - asp.branch_pis = false; - asp.balance_pis = false; - asp.balance_pos = false; - ps.assume = asp; - { - ps.scheduling = buffer_insertion_params::ASAP; - buffer_insertion buffering1( aig, ps ); - buffered_aig_network buffered1; - CHECK( buffering1.run( buffered1 ) == 2u ); - CHECK( verify_aqfp_buffer( buffered1, asp ) == true ); - - ps.scheduling = buffer_insertion_params::ASAP_depth; - buffer_insertion buffering2( aig, ps ); - buffered_aig_network buffered2; - CHECK( buffering2.run( buffered2 ) == 2u ); - CHECK( verify_aqfp_buffer( buffered2, asp ) == true ); + buffering4.run( buffered4 ); + CHECK( buffering4.depth() == 4 ); + CHECK( verify_aqfp_buffer( buffered4, asp, buffering4.pi_levels() ) == true ); } } @@ -395,7 +105,7 @@ TEST_CASE( "optimization with chunked movement", "[buffer_insertion]" ) auto const num_buf_asap = buffering.num_buffers(); auto const num_buf_opt = buffering.run( buffered_ntk ); - CHECK( verify_aqfp_buffer( buffered_ntk, ps.assume ) == true ); + CHECK( verify_aqfp_buffer( buffered_ntk, ps.assume, buffering.pi_levels() ) == true ); CHECK( num_buf_opt < num_buf_asap ); } #endif